{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.0,
  "eval_steps": 50,
  "global_step": 8124,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.00036927621861152144,
      "grad_norm": 0.8675795197486877,
      "learning_rate": 4e-05,
      "loss": 2.3283,
      "step": 1
    },
    {
      "epoch": 0.0007385524372230429,
      "grad_norm": 0.825646698474884,
      "learning_rate": 8e-05,
      "loss": 2.4218,
      "step": 2
    },
    {
      "epoch": 0.0011078286558345643,
      "grad_norm": 0.9647901654243469,
      "learning_rate": 0.00012,
      "loss": 2.4091,
      "step": 3
    },
    {
      "epoch": 0.0014771048744460858,
      "grad_norm": 0.9235298037528992,
      "learning_rate": 0.00016,
      "loss": 2.4576,
      "step": 4
    },
    {
      "epoch": 0.001846381093057607,
      "grad_norm": 0.7539268732070923,
      "learning_rate": 0.0002,
      "loss": 2.1086,
      "step": 5
    },
    {
      "epoch": 0.0022156573116691287,
      "grad_norm": 0.897127091884613,
      "learning_rate": 0.0001999753664244365,
      "loss": 1.8471,
      "step": 6
    },
    {
      "epoch": 0.00258493353028065,
      "grad_norm": 1.0578811168670654,
      "learning_rate": 0.000199950732848873,
      "loss": 1.4978,
      "step": 7
    },
    {
      "epoch": 0.0029542097488921715,
      "grad_norm": 1.1795432567596436,
      "learning_rate": 0.00019992609927330953,
      "loss": 1.2179,
      "step": 8
    },
    {
      "epoch": 0.0033234859675036928,
      "grad_norm": 1.1029233932495117,
      "learning_rate": 0.00019990146569774604,
      "loss": 1.0425,
      "step": 9
    },
    {
      "epoch": 0.003692762186115214,
      "grad_norm": 0.9418121576309204,
      "learning_rate": 0.00019987683212218253,
      "loss": 0.7962,
      "step": 10
    },
    {
      "epoch": 0.004062038404726735,
      "grad_norm": 0.5863303542137146,
      "learning_rate": 0.00019985219854661904,
      "loss": 0.8584,
      "step": 11
    },
    {
      "epoch": 0.004431314623338257,
      "grad_norm": 0.5713561177253723,
      "learning_rate": 0.00019982756497105556,
      "loss": 0.7218,
      "step": 12
    },
    {
      "epoch": 0.0048005908419497785,
      "grad_norm": 0.39308950304985046,
      "learning_rate": 0.00019980293139549207,
      "loss": 0.6724,
      "step": 13
    },
    {
      "epoch": 0.0051698670605613,
      "grad_norm": 0.43338122963905334,
      "learning_rate": 0.00019977829781992856,
      "loss": 0.6111,
      "step": 14
    },
    {
      "epoch": 0.005539143279172821,
      "grad_norm": 0.38551896810531616,
      "learning_rate": 0.00019975366424436507,
      "loss": 0.6062,
      "step": 15
    },
    {
      "epoch": 0.005908419497784343,
      "grad_norm": 0.39907607436180115,
      "learning_rate": 0.00019972903066880156,
      "loss": 0.5282,
      "step": 16
    },
    {
      "epoch": 0.006277695716395864,
      "grad_norm": 0.4794689416885376,
      "learning_rate": 0.0001997043970932381,
      "loss": 0.5971,
      "step": 17
    },
    {
      "epoch": 0.0066469719350073855,
      "grad_norm": 0.8089517951011658,
      "learning_rate": 0.0001996797635176746,
      "loss": 0.6808,
      "step": 18
    },
    {
      "epoch": 0.007016248153618907,
      "grad_norm": 0.5165871977806091,
      "learning_rate": 0.0001996551299421111,
      "loss": 0.585,
      "step": 19
    },
    {
      "epoch": 0.007385524372230428,
      "grad_norm": 0.5364779829978943,
      "learning_rate": 0.0001996304963665476,
      "loss": 0.5776,
      "step": 20
    },
    {
      "epoch": 0.00775480059084195,
      "grad_norm": 0.509082019329071,
      "learning_rate": 0.0001996058627909841,
      "loss": 0.6039,
      "step": 21
    },
    {
      "epoch": 0.00812407680945347,
      "grad_norm": 0.5860568881034851,
      "learning_rate": 0.00019958122921542062,
      "loss": 0.5506,
      "step": 22
    },
    {
      "epoch": 0.008493353028064993,
      "grad_norm": 0.5222316384315491,
      "learning_rate": 0.00019955659563985714,
      "loss": 0.5856,
      "step": 23
    },
    {
      "epoch": 0.008862629246676515,
      "grad_norm": 0.5385368466377258,
      "learning_rate": 0.00019953196206429363,
      "loss": 0.5881,
      "step": 24
    },
    {
      "epoch": 0.009231905465288036,
      "grad_norm": 0.5340928435325623,
      "learning_rate": 0.00019950732848873014,
      "loss": 0.4904,
      "step": 25
    },
    {
      "epoch": 0.009601181683899557,
      "grad_norm": 0.5609019994735718,
      "learning_rate": 0.00019948269491316665,
      "loss": 0.4813,
      "step": 26
    },
    {
      "epoch": 0.009970457902511078,
      "grad_norm": 0.5047788023948669,
      "learning_rate": 0.00019945806133760317,
      "loss": 0.5821,
      "step": 27
    },
    {
      "epoch": 0.0103397341211226,
      "grad_norm": 0.4963848888874054,
      "learning_rate": 0.00019943342776203966,
      "loss": 0.6033,
      "step": 28
    },
    {
      "epoch": 0.01070901033973412,
      "grad_norm": 0.39464226365089417,
      "learning_rate": 0.00019940879418647617,
      "loss": 0.4476,
      "step": 29
    },
    {
      "epoch": 0.011078286558345642,
      "grad_norm": 0.3563077449798584,
      "learning_rate": 0.00019938416061091269,
      "loss": 0.4926,
      "step": 30
    },
    {
      "epoch": 0.011447562776957163,
      "grad_norm": 0.4271666407585144,
      "learning_rate": 0.0001993595270353492,
      "loss": 0.5853,
      "step": 31
    },
    {
      "epoch": 0.011816838995568686,
      "grad_norm": 0.40097787976264954,
      "learning_rate": 0.0001993348934597857,
      "loss": 0.5056,
      "step": 32
    },
    {
      "epoch": 0.012186115214180207,
      "grad_norm": 0.49196794629096985,
      "learning_rate": 0.0001993102598842222,
      "loss": 0.4884,
      "step": 33
    },
    {
      "epoch": 0.012555391432791729,
      "grad_norm": 0.3631584346294403,
      "learning_rate": 0.0001992856263086587,
      "loss": 0.4672,
      "step": 34
    },
    {
      "epoch": 0.01292466765140325,
      "grad_norm": 0.43581250309944153,
      "learning_rate": 0.00019926099273309523,
      "loss": 0.4092,
      "step": 35
    },
    {
      "epoch": 0.013293943870014771,
      "grad_norm": 0.3988689184188843,
      "learning_rate": 0.00019923635915753172,
      "loss": 0.5401,
      "step": 36
    },
    {
      "epoch": 0.013663220088626292,
      "grad_norm": 0.36282041668891907,
      "learning_rate": 0.00019921172558196823,
      "loss": 0.4813,
      "step": 37
    },
    {
      "epoch": 0.014032496307237814,
      "grad_norm": 0.4121813178062439,
      "learning_rate": 0.00019918709200640472,
      "loss": 0.4544,
      "step": 38
    },
    {
      "epoch": 0.014401772525849335,
      "grad_norm": 0.3579091727733612,
      "learning_rate": 0.00019916245843084124,
      "loss": 0.4316,
      "step": 39
    },
    {
      "epoch": 0.014771048744460856,
      "grad_norm": 0.4818468689918518,
      "learning_rate": 0.00019913782485527775,
      "loss": 0.5688,
      "step": 40
    },
    {
      "epoch": 0.015140324963072379,
      "grad_norm": 0.444948673248291,
      "learning_rate": 0.00019911319127971427,
      "loss": 0.5442,
      "step": 41
    },
    {
      "epoch": 0.0155096011816839,
      "grad_norm": 0.35346049070358276,
      "learning_rate": 0.00019908855770415075,
      "loss": 0.3857,
      "step": 42
    },
    {
      "epoch": 0.01587887740029542,
      "grad_norm": 0.4043765664100647,
      "learning_rate": 0.00019906392412858727,
      "loss": 0.5172,
      "step": 43
    },
    {
      "epoch": 0.01624815361890694,
      "grad_norm": 0.47345271706581116,
      "learning_rate": 0.00019903929055302378,
      "loss": 0.491,
      "step": 44
    },
    {
      "epoch": 0.016617429837518464,
      "grad_norm": 0.3552895784378052,
      "learning_rate": 0.0001990146569774603,
      "loss": 0.5305,
      "step": 45
    },
    {
      "epoch": 0.016986706056129987,
      "grad_norm": 0.36031079292297363,
      "learning_rate": 0.00019899002340189678,
      "loss": 0.4335,
      "step": 46
    },
    {
      "epoch": 0.017355982274741506,
      "grad_norm": 0.37877610325813293,
      "learning_rate": 0.0001989653898263333,
      "loss": 0.4634,
      "step": 47
    },
    {
      "epoch": 0.01772525849335303,
      "grad_norm": 0.3502812385559082,
      "learning_rate": 0.0001989407562507698,
      "loss": 0.4983,
      "step": 48
    },
    {
      "epoch": 0.01809453471196455,
      "grad_norm": 0.3954726457595825,
      "learning_rate": 0.00019891612267520633,
      "loss": 0.5041,
      "step": 49
    },
    {
      "epoch": 0.01846381093057607,
      "grad_norm": 0.35697075724601746,
      "learning_rate": 0.00019889148909964282,
      "loss": 0.5117,
      "step": 50
    },
    {
      "epoch": 0.01846381093057607,
      "eval_loss": 0.45282307267189026,
      "eval_runtime": 6.8472,
      "eval_samples_per_second": 7.302,
      "eval_steps_per_second": 1.022,
      "step": 50
    },
    {
      "epoch": 0.01883308714918759,
      "grad_norm": 0.36884281039237976,
      "learning_rate": 0.00019886685552407933,
      "loss": 0.4023,
      "step": 51
    },
    {
      "epoch": 0.019202363367799114,
      "grad_norm": 0.4169233739376068,
      "learning_rate": 0.00019884222194851582,
      "loss": 0.5357,
      "step": 52
    },
    {
      "epoch": 0.019571639586410634,
      "grad_norm": 0.45588135719299316,
      "learning_rate": 0.00019881758837295233,
      "loss": 0.5044,
      "step": 53
    },
    {
      "epoch": 0.019940915805022157,
      "grad_norm": 0.34777161478996277,
      "learning_rate": 0.00019879295479738885,
      "loss": 0.4362,
      "step": 54
    },
    {
      "epoch": 0.02031019202363368,
      "grad_norm": 0.29894888401031494,
      "learning_rate": 0.00019876832122182536,
      "loss": 0.395,
      "step": 55
    },
    {
      "epoch": 0.0206794682422452,
      "grad_norm": 0.34574243426322937,
      "learning_rate": 0.00019874368764626185,
      "loss": 0.4749,
      "step": 56
    },
    {
      "epoch": 0.021048744460856722,
      "grad_norm": 0.33335548639297485,
      "learning_rate": 0.00019871905407069836,
      "loss": 0.4977,
      "step": 57
    },
    {
      "epoch": 0.02141802067946824,
      "grad_norm": 0.3539446294307709,
      "learning_rate": 0.00019869442049513488,
      "loss": 0.4267,
      "step": 58
    },
    {
      "epoch": 0.021787296898079764,
      "grad_norm": 0.42208486795425415,
      "learning_rate": 0.0001986697869195714,
      "loss": 0.5042,
      "step": 59
    },
    {
      "epoch": 0.022156573116691284,
      "grad_norm": 0.740729808807373,
      "learning_rate": 0.00019864515334400788,
      "loss": 0.5147,
      "step": 60
    },
    {
      "epoch": 0.022525849335302807,
      "grad_norm": 0.40567144751548767,
      "learning_rate": 0.0001986205197684444,
      "loss": 0.4998,
      "step": 61
    },
    {
      "epoch": 0.022895125553914326,
      "grad_norm": 0.3556930720806122,
      "learning_rate": 0.0001985958861928809,
      "loss": 0.4965,
      "step": 62
    },
    {
      "epoch": 0.02326440177252585,
      "grad_norm": 0.3750048875808716,
      "learning_rate": 0.00019857125261731743,
      "loss": 0.5568,
      "step": 63
    },
    {
      "epoch": 0.023633677991137372,
      "grad_norm": 0.29892468452453613,
      "learning_rate": 0.0001985466190417539,
      "loss": 0.431,
      "step": 64
    },
    {
      "epoch": 0.024002954209748892,
      "grad_norm": 0.4254045784473419,
      "learning_rate": 0.00019852198546619043,
      "loss": 0.4617,
      "step": 65
    },
    {
      "epoch": 0.024372230428360415,
      "grad_norm": 0.37175410985946655,
      "learning_rate": 0.00019849735189062691,
      "loss": 0.4988,
      "step": 66
    },
    {
      "epoch": 0.024741506646971934,
      "grad_norm": 0.37746497988700867,
      "learning_rate": 0.00019847271831506346,
      "loss": 0.5082,
      "step": 67
    },
    {
      "epoch": 0.025110782865583457,
      "grad_norm": 0.32295873761177063,
      "learning_rate": 0.00019844808473949994,
      "loss": 0.3769,
      "step": 68
    },
    {
      "epoch": 0.025480059084194977,
      "grad_norm": 0.34700700640678406,
      "learning_rate": 0.00019842345116393646,
      "loss": 0.4392,
      "step": 69
    },
    {
      "epoch": 0.0258493353028065,
      "grad_norm": 0.47979068756103516,
      "learning_rate": 0.00019839881758837295,
      "loss": 0.5096,
      "step": 70
    },
    {
      "epoch": 0.02621861152141802,
      "grad_norm": 0.3435942828655243,
      "learning_rate": 0.00019837418401280946,
      "loss": 0.4908,
      "step": 71
    },
    {
      "epoch": 0.026587887740029542,
      "grad_norm": 0.43033188581466675,
      "learning_rate": 0.00019834955043724598,
      "loss": 0.4926,
      "step": 72
    },
    {
      "epoch": 0.026957163958641065,
      "grad_norm": 0.32151058316230774,
      "learning_rate": 0.0001983249168616825,
      "loss": 0.4244,
      "step": 73
    },
    {
      "epoch": 0.027326440177252585,
      "grad_norm": 0.37814804911613464,
      "learning_rate": 0.00019830028328611898,
      "loss": 0.55,
      "step": 74
    },
    {
      "epoch": 0.027695716395864108,
      "grad_norm": 0.39102527499198914,
      "learning_rate": 0.0001982756497105555,
      "loss": 0.5034,
      "step": 75
    },
    {
      "epoch": 0.028064992614475627,
      "grad_norm": 0.4471779763698578,
      "learning_rate": 0.000198251016134992,
      "loss": 0.543,
      "step": 76
    },
    {
      "epoch": 0.02843426883308715,
      "grad_norm": 0.4095175266265869,
      "learning_rate": 0.00019822638255942852,
      "loss": 0.5311,
      "step": 77
    },
    {
      "epoch": 0.02880354505169867,
      "grad_norm": 0.3491657078266144,
      "learning_rate": 0.000198201748983865,
      "loss": 0.5068,
      "step": 78
    },
    {
      "epoch": 0.029172821270310192,
      "grad_norm": 0.3270619511604309,
      "learning_rate": 0.00019817711540830152,
      "loss": 0.44,
      "step": 79
    },
    {
      "epoch": 0.029542097488921712,
      "grad_norm": 0.30704453587532043,
      "learning_rate": 0.000198152481832738,
      "loss": 0.4399,
      "step": 80
    },
    {
      "epoch": 0.029911373707533235,
      "grad_norm": 0.34257006645202637,
      "learning_rate": 0.00019812784825717455,
      "loss": 0.4092,
      "step": 81
    },
    {
      "epoch": 0.030280649926144758,
      "grad_norm": 0.3198525011539459,
      "learning_rate": 0.00019810321468161104,
      "loss": 0.4801,
      "step": 82
    },
    {
      "epoch": 0.030649926144756277,
      "grad_norm": 0.30465447902679443,
      "learning_rate": 0.00019807858110604756,
      "loss": 0.416,
      "step": 83
    },
    {
      "epoch": 0.0310192023633678,
      "grad_norm": 0.32561299204826355,
      "learning_rate": 0.00019805394753048404,
      "loss": 0.514,
      "step": 84
    },
    {
      "epoch": 0.03138847858197932,
      "grad_norm": 0.3592464327812195,
      "learning_rate": 0.00019802931395492056,
      "loss": 0.4441,
      "step": 85
    },
    {
      "epoch": 0.03175775480059084,
      "grad_norm": 0.36717188358306885,
      "learning_rate": 0.00019800468037935707,
      "loss": 0.4374,
      "step": 86
    },
    {
      "epoch": 0.03212703101920236,
      "grad_norm": 0.2966742217540741,
      "learning_rate": 0.0001979800468037936,
      "loss": 0.38,
      "step": 87
    },
    {
      "epoch": 0.03249630723781388,
      "grad_norm": 0.35723623633384705,
      "learning_rate": 0.00019795541322823007,
      "loss": 0.4626,
      "step": 88
    },
    {
      "epoch": 0.03286558345642541,
      "grad_norm": 0.31849217414855957,
      "learning_rate": 0.0001979307796526666,
      "loss": 0.387,
      "step": 89
    },
    {
      "epoch": 0.03323485967503693,
      "grad_norm": 0.32004714012145996,
      "learning_rate": 0.0001979061460771031,
      "loss": 0.4019,
      "step": 90
    },
    {
      "epoch": 0.03360413589364845,
      "grad_norm": 0.3383117616176605,
      "learning_rate": 0.00019788151250153962,
      "loss": 0.4433,
      "step": 91
    },
    {
      "epoch": 0.033973412112259974,
      "grad_norm": 0.3656468093395233,
      "learning_rate": 0.0001978568789259761,
      "loss": 0.4419,
      "step": 92
    },
    {
      "epoch": 0.03434268833087149,
      "grad_norm": 0.3365080654621124,
      "learning_rate": 0.00019783224535041262,
      "loss": 0.431,
      "step": 93
    },
    {
      "epoch": 0.03471196454948301,
      "grad_norm": 0.4304644465446472,
      "learning_rate": 0.00019780761177484913,
      "loss": 0.4476,
      "step": 94
    },
    {
      "epoch": 0.03508124076809453,
      "grad_norm": 0.43384629487991333,
      "learning_rate": 0.00019778297819928565,
      "loss": 0.4424,
      "step": 95
    },
    {
      "epoch": 0.03545051698670606,
      "grad_norm": 0.3593868315219879,
      "learning_rate": 0.00019775834462372214,
      "loss": 0.462,
      "step": 96
    },
    {
      "epoch": 0.03581979320531758,
      "grad_norm": 0.307407945394516,
      "learning_rate": 0.00019773371104815865,
      "loss": 0.4392,
      "step": 97
    },
    {
      "epoch": 0.0361890694239291,
      "grad_norm": 0.36906832456588745,
      "learning_rate": 0.00019770907747259514,
      "loss": 0.5025,
      "step": 98
    },
    {
      "epoch": 0.03655834564254062,
      "grad_norm": 0.28905150294303894,
      "learning_rate": 0.00019768444389703168,
      "loss": 0.4284,
      "step": 99
    },
    {
      "epoch": 0.03692762186115214,
      "grad_norm": 0.272885262966156,
      "learning_rate": 0.00019765981032146817,
      "loss": 0.3671,
      "step": 100
    },
    {
      "epoch": 0.03692762186115214,
      "eval_loss": 0.419514000415802,
      "eval_runtime": 5.8696,
      "eval_samples_per_second": 8.518,
      "eval_steps_per_second": 1.193,
      "step": 100
    },
    {
      "epoch": 0.03729689807976366,
      "grad_norm": 0.33638831973075867,
      "learning_rate": 0.00019763517674590468,
      "loss": 0.501,
      "step": 101
    },
    {
      "epoch": 0.03766617429837518,
      "grad_norm": 0.2842804789543152,
      "learning_rate": 0.00019761054317034117,
      "loss": 0.3818,
      "step": 102
    },
    {
      "epoch": 0.03803545051698671,
      "grad_norm": 0.36060819029808044,
      "learning_rate": 0.00019758590959477769,
      "loss": 0.389,
      "step": 103
    },
    {
      "epoch": 0.03840472673559823,
      "grad_norm": 0.3600110709667206,
      "learning_rate": 0.0001975612760192142,
      "loss": 0.4301,
      "step": 104
    },
    {
      "epoch": 0.03877400295420975,
      "grad_norm": 0.35801422595977783,
      "learning_rate": 0.00019753664244365071,
      "loss": 0.4298,
      "step": 105
    },
    {
      "epoch": 0.03914327917282127,
      "grad_norm": 0.43014129996299744,
      "learning_rate": 0.0001975120088680872,
      "loss": 0.4976,
      "step": 106
    },
    {
      "epoch": 0.039512555391432794,
      "grad_norm": 0.3420620560646057,
      "learning_rate": 0.00019748737529252372,
      "loss": 0.3749,
      "step": 107
    },
    {
      "epoch": 0.03988183161004431,
      "grad_norm": 0.3296439051628113,
      "learning_rate": 0.00019746274171696023,
      "loss": 0.4317,
      "step": 108
    },
    {
      "epoch": 0.04025110782865583,
      "grad_norm": 0.29001107811927795,
      "learning_rate": 0.00019743810814139675,
      "loss": 0.416,
      "step": 109
    },
    {
      "epoch": 0.04062038404726736,
      "grad_norm": 0.37443405389785767,
      "learning_rate": 0.00019741347456583323,
      "loss": 0.3803,
      "step": 110
    },
    {
      "epoch": 0.04098966026587888,
      "grad_norm": 0.3694715201854706,
      "learning_rate": 0.00019738884099026975,
      "loss": 0.4312,
      "step": 111
    },
    {
      "epoch": 0.0413589364844904,
      "grad_norm": 0.34137406945228577,
      "learning_rate": 0.00019736420741470624,
      "loss": 0.5141,
      "step": 112
    },
    {
      "epoch": 0.04172821270310192,
      "grad_norm": 0.3892885744571686,
      "learning_rate": 0.00019733957383914278,
      "loss": 0.4073,
      "step": 113
    },
    {
      "epoch": 0.042097488921713444,
      "grad_norm": 0.3243370056152344,
      "learning_rate": 0.00019731494026357927,
      "loss": 0.4549,
      "step": 114
    },
    {
      "epoch": 0.042466765140324964,
      "grad_norm": 0.31876978278160095,
      "learning_rate": 0.00019729030668801578,
      "loss": 0.4027,
      "step": 115
    },
    {
      "epoch": 0.04283604135893648,
      "grad_norm": 0.27553582191467285,
      "learning_rate": 0.00019726567311245227,
      "loss": 0.326,
      "step": 116
    },
    {
      "epoch": 0.043205317577548,
      "grad_norm": 0.30736929178237915,
      "learning_rate": 0.00019724103953688878,
      "loss": 0.4841,
      "step": 117
    },
    {
      "epoch": 0.04357459379615953,
      "grad_norm": 0.41499844193458557,
      "learning_rate": 0.0001972164059613253,
      "loss": 0.5411,
      "step": 118
    },
    {
      "epoch": 0.04394387001477105,
      "grad_norm": 0.3472537398338318,
      "learning_rate": 0.0001971917723857618,
      "loss": 0.3876,
      "step": 119
    },
    {
      "epoch": 0.04431314623338257,
      "grad_norm": 0.33110174536705017,
      "learning_rate": 0.0001971671388101983,
      "loss": 0.4174,
      "step": 120
    },
    {
      "epoch": 0.044682422451994094,
      "grad_norm": 0.27718454599380493,
      "learning_rate": 0.0001971425052346348,
      "loss": 0.339,
      "step": 121
    },
    {
      "epoch": 0.045051698670605614,
      "grad_norm": 0.2659473717212677,
      "learning_rate": 0.00019711787165907133,
      "loss": 0.3273,
      "step": 122
    },
    {
      "epoch": 0.04542097488921713,
      "grad_norm": 0.37134623527526855,
      "learning_rate": 0.00019709323808350784,
      "loss": 0.4924,
      "step": 123
    },
    {
      "epoch": 0.04579025110782865,
      "grad_norm": 0.32371461391448975,
      "learning_rate": 0.00019706860450794433,
      "loss": 0.384,
      "step": 124
    },
    {
      "epoch": 0.04615952732644018,
      "grad_norm": 0.31927135586738586,
      "learning_rate": 0.00019704397093238084,
      "loss": 0.4308,
      "step": 125
    },
    {
      "epoch": 0.0465288035450517,
      "grad_norm": 0.3606109619140625,
      "learning_rate": 0.00019701933735681736,
      "loss": 0.392,
      "step": 126
    },
    {
      "epoch": 0.04689807976366322,
      "grad_norm": 0.39174655079841614,
      "learning_rate": 0.00019699470378125387,
      "loss": 0.4944,
      "step": 127
    },
    {
      "epoch": 0.047267355982274745,
      "grad_norm": 0.379129022359848,
      "learning_rate": 0.00019697007020569036,
      "loss": 0.422,
      "step": 128
    },
    {
      "epoch": 0.047636632200886264,
      "grad_norm": 0.31061556935310364,
      "learning_rate": 0.00019694543663012688,
      "loss": 0.4159,
      "step": 129
    },
    {
      "epoch": 0.048005908419497784,
      "grad_norm": 0.35142982006073,
      "learning_rate": 0.00019692080305456336,
      "loss": 0.5237,
      "step": 130
    },
    {
      "epoch": 0.0483751846381093,
      "grad_norm": 0.352500855922699,
      "learning_rate": 0.0001968961694789999,
      "loss": 0.4278,
      "step": 131
    },
    {
      "epoch": 0.04874446085672083,
      "grad_norm": 0.3081618547439575,
      "learning_rate": 0.0001968715359034364,
      "loss": 0.4002,
      "step": 132
    },
    {
      "epoch": 0.04911373707533235,
      "grad_norm": 0.3187482953071594,
      "learning_rate": 0.0001968469023278729,
      "loss": 0.4478,
      "step": 133
    },
    {
      "epoch": 0.04948301329394387,
      "grad_norm": 0.3648607134819031,
      "learning_rate": 0.0001968222687523094,
      "loss": 0.3772,
      "step": 134
    },
    {
      "epoch": 0.04985228951255539,
      "grad_norm": 0.3242417871952057,
      "learning_rate": 0.0001967976351767459,
      "loss": 0.399,
      "step": 135
    },
    {
      "epoch": 0.050221565731166914,
      "grad_norm": 0.3742475211620331,
      "learning_rate": 0.00019677300160118242,
      "loss": 0.5924,
      "step": 136
    },
    {
      "epoch": 0.050590841949778434,
      "grad_norm": 0.3294001817703247,
      "learning_rate": 0.00019674836802561894,
      "loss": 0.4061,
      "step": 137
    },
    {
      "epoch": 0.05096011816838995,
      "grad_norm": 0.2923147976398468,
      "learning_rate": 0.00019672373445005543,
      "loss": 0.3732,
      "step": 138
    },
    {
      "epoch": 0.05132939438700148,
      "grad_norm": 0.28740236163139343,
      "learning_rate": 0.00019669910087449194,
      "loss": 0.3689,
      "step": 139
    },
    {
      "epoch": 0.051698670605613,
      "grad_norm": 0.3307899534702301,
      "learning_rate": 0.00019667446729892846,
      "loss": 0.4061,
      "step": 140
    },
    {
      "epoch": 0.05206794682422452,
      "grad_norm": 0.3527816832065582,
      "learning_rate": 0.00019664983372336497,
      "loss": 0.5354,
      "step": 141
    },
    {
      "epoch": 0.05243722304283604,
      "grad_norm": 0.32473066449165344,
      "learning_rate": 0.00019662520014780146,
      "loss": 0.393,
      "step": 142
    },
    {
      "epoch": 0.052806499261447565,
      "grad_norm": 0.3600977659225464,
      "learning_rate": 0.00019660056657223797,
      "loss": 0.4797,
      "step": 143
    },
    {
      "epoch": 0.053175775480059084,
      "grad_norm": 0.3709307610988617,
      "learning_rate": 0.00019657593299667446,
      "loss": 0.4527,
      "step": 144
    },
    {
      "epoch": 0.053545051698670604,
      "grad_norm": 0.31951725482940674,
      "learning_rate": 0.000196551299421111,
      "loss": 0.4234,
      "step": 145
    },
    {
      "epoch": 0.05391432791728213,
      "grad_norm": 0.34864845871925354,
      "learning_rate": 0.0001965266658455475,
      "loss": 0.4104,
      "step": 146
    },
    {
      "epoch": 0.05428360413589365,
      "grad_norm": 0.3169608414173126,
      "learning_rate": 0.000196502032269984,
      "loss": 0.4348,
      "step": 147
    },
    {
      "epoch": 0.05465288035450517,
      "grad_norm": 0.32839155197143555,
      "learning_rate": 0.0001964773986944205,
      "loss": 0.4174,
      "step": 148
    },
    {
      "epoch": 0.05502215657311669,
      "grad_norm": 0.31056901812553406,
      "learning_rate": 0.000196452765118857,
      "loss": 0.3643,
      "step": 149
    },
    {
      "epoch": 0.055391432791728215,
      "grad_norm": 0.3091226816177368,
      "learning_rate": 0.00019642813154329352,
      "loss": 0.353,
      "step": 150
    },
    {
      "epoch": 0.055391432791728215,
      "eval_loss": 0.4037952721118927,
      "eval_runtime": 5.8759,
      "eval_samples_per_second": 8.509,
      "eval_steps_per_second": 1.191,
      "step": 150
    },
    {
      "epoch": 0.055760709010339735,
      "grad_norm": 0.32927340269088745,
      "learning_rate": 0.00019640349796773004,
      "loss": 0.4333,
      "step": 151
    },
    {
      "epoch": 0.056129985228951254,
      "grad_norm": 0.37403640151023865,
      "learning_rate": 0.00019637886439216652,
      "loss": 0.5012,
      "step": 152
    },
    {
      "epoch": 0.056499261447562774,
      "grad_norm": 0.31805136799812317,
      "learning_rate": 0.00019635423081660304,
      "loss": 0.4309,
      "step": 153
    },
    {
      "epoch": 0.0568685376661743,
      "grad_norm": 0.2776757776737213,
      "learning_rate": 0.00019632959724103955,
      "loss": 0.3624,
      "step": 154
    },
    {
      "epoch": 0.05723781388478582,
      "grad_norm": 0.39969655871391296,
      "learning_rate": 0.00019630496366547607,
      "loss": 0.4735,
      "step": 155
    },
    {
      "epoch": 0.05760709010339734,
      "grad_norm": 0.3075847327709198,
      "learning_rate": 0.00019628033008991255,
      "loss": 0.4294,
      "step": 156
    },
    {
      "epoch": 0.057976366322008865,
      "grad_norm": 0.2900707423686981,
      "learning_rate": 0.00019625569651434907,
      "loss": 0.3997,
      "step": 157
    },
    {
      "epoch": 0.058345642540620385,
      "grad_norm": 0.3196985125541687,
      "learning_rate": 0.00019623106293878556,
      "loss": 0.4385,
      "step": 158
    },
    {
      "epoch": 0.058714918759231904,
      "grad_norm": 0.42510777711868286,
      "learning_rate": 0.0001962064293632221,
      "loss": 0.3615,
      "step": 159
    },
    {
      "epoch": 0.059084194977843424,
      "grad_norm": 0.31159719824790955,
      "learning_rate": 0.00019618179578765859,
      "loss": 0.4185,
      "step": 160
    },
    {
      "epoch": 0.05945347119645495,
      "grad_norm": 0.2744397222995758,
      "learning_rate": 0.0001961571622120951,
      "loss": 0.3571,
      "step": 161
    },
    {
      "epoch": 0.05982274741506647,
      "grad_norm": 0.3381613492965698,
      "learning_rate": 0.0001961325286365316,
      "loss": 0.4154,
      "step": 162
    },
    {
      "epoch": 0.06019202363367799,
      "grad_norm": 0.27710267901420593,
      "learning_rate": 0.00019610789506096813,
      "loss": 0.3451,
      "step": 163
    },
    {
      "epoch": 0.060561299852289516,
      "grad_norm": 0.3598025143146515,
      "learning_rate": 0.00019608326148540462,
      "loss": 0.4224,
      "step": 164
    },
    {
      "epoch": 0.060930576070901035,
      "grad_norm": 0.2834571897983551,
      "learning_rate": 0.00019605862790984113,
      "loss": 0.4527,
      "step": 165
    },
    {
      "epoch": 0.061299852289512555,
      "grad_norm": 0.31147488951683044,
      "learning_rate": 0.00019603399433427762,
      "loss": 0.4013,
      "step": 166
    },
    {
      "epoch": 0.061669128508124074,
      "grad_norm": 0.3192875385284424,
      "learning_rate": 0.00019600936075871413,
      "loss": 0.404,
      "step": 167
    },
    {
      "epoch": 0.0620384047267356,
      "grad_norm": 0.31411129236221313,
      "learning_rate": 0.00019598472718315065,
      "loss": 0.3808,
      "step": 168
    },
    {
      "epoch": 0.06240768094534712,
      "grad_norm": 0.32308077812194824,
      "learning_rate": 0.00019596009360758716,
      "loss": 0.4712,
      "step": 169
    },
    {
      "epoch": 0.06277695716395865,
      "grad_norm": 0.28216615319252014,
      "learning_rate": 0.00019593546003202365,
      "loss": 0.3801,
      "step": 170
    },
    {
      "epoch": 0.06314623338257017,
      "grad_norm": 0.31070226430892944,
      "learning_rate": 0.00019591082645646017,
      "loss": 0.4255,
      "step": 171
    },
    {
      "epoch": 0.06351550960118169,
      "grad_norm": 0.36081427335739136,
      "learning_rate": 0.00019588619288089668,
      "loss": 0.3502,
      "step": 172
    },
    {
      "epoch": 0.0638847858197932,
      "grad_norm": 0.3797888159751892,
      "learning_rate": 0.0001958615593053332,
      "loss": 0.4398,
      "step": 173
    },
    {
      "epoch": 0.06425406203840472,
      "grad_norm": 0.33760133385658264,
      "learning_rate": 0.00019583692572976968,
      "loss": 0.5073,
      "step": 174
    },
    {
      "epoch": 0.06462333825701624,
      "grad_norm": 0.3150223195552826,
      "learning_rate": 0.0001958122921542062,
      "loss": 0.3642,
      "step": 175
    },
    {
      "epoch": 0.06499261447562776,
      "grad_norm": 0.47819983959198,
      "learning_rate": 0.00019578765857864268,
      "loss": 0.4292,
      "step": 176
    },
    {
      "epoch": 0.0653618906942393,
      "grad_norm": 0.28102725744247437,
      "learning_rate": 0.00019576302500307923,
      "loss": 0.3867,
      "step": 177
    },
    {
      "epoch": 0.06573116691285082,
      "grad_norm": 0.357327401638031,
      "learning_rate": 0.00019573839142751571,
      "loss": 0.4362,
      "step": 178
    },
    {
      "epoch": 0.06610044313146234,
      "grad_norm": 0.2614378035068512,
      "learning_rate": 0.00019571375785195223,
      "loss": 0.3398,
      "step": 179
    },
    {
      "epoch": 0.06646971935007386,
      "grad_norm": 0.2837601900100708,
      "learning_rate": 0.00019568912427638872,
      "loss": 0.3158,
      "step": 180
    },
    {
      "epoch": 0.06683899556868537,
      "grad_norm": 0.2811811566352844,
      "learning_rate": 0.00019566449070082523,
      "loss": 0.3756,
      "step": 181
    },
    {
      "epoch": 0.0672082717872969,
      "grad_norm": 0.3223420977592468,
      "learning_rate": 0.00019563985712526175,
      "loss": 0.4041,
      "step": 182
    },
    {
      "epoch": 0.06757754800590841,
      "grad_norm": 0.3521338105201721,
      "learning_rate": 0.00019561522354969826,
      "loss": 0.5336,
      "step": 183
    },
    {
      "epoch": 0.06794682422451995,
      "grad_norm": 0.36033207178115845,
      "learning_rate": 0.00019559058997413475,
      "loss": 0.5104,
      "step": 184
    },
    {
      "epoch": 0.06831610044313147,
      "grad_norm": 0.3409123122692108,
      "learning_rate": 0.00019556595639857126,
      "loss": 0.4852,
      "step": 185
    },
    {
      "epoch": 0.06868537666174299,
      "grad_norm": 0.29176652431488037,
      "learning_rate": 0.00019554132282300778,
      "loss": 0.3441,
      "step": 186
    },
    {
      "epoch": 0.0690546528803545,
      "grad_norm": 0.2858871519565582,
      "learning_rate": 0.0001955166892474443,
      "loss": 0.3342,
      "step": 187
    },
    {
      "epoch": 0.06942392909896603,
      "grad_norm": 0.2597043514251709,
      "learning_rate": 0.00019549205567188078,
      "loss": 0.3095,
      "step": 188
    },
    {
      "epoch": 0.06979320531757754,
      "grad_norm": 0.30069851875305176,
      "learning_rate": 0.0001954674220963173,
      "loss": 0.3474,
      "step": 189
    },
    {
      "epoch": 0.07016248153618906,
      "grad_norm": 0.33916133642196655,
      "learning_rate": 0.00019544278852075378,
      "loss": 0.5525,
      "step": 190
    },
    {
      "epoch": 0.0705317577548006,
      "grad_norm": 0.3795078992843628,
      "learning_rate": 0.00019541815494519032,
      "loss": 0.5012,
      "step": 191
    },
    {
      "epoch": 0.07090103397341212,
      "grad_norm": 0.4375127851963043,
      "learning_rate": 0.0001953935213696268,
      "loss": 0.4266,
      "step": 192
    },
    {
      "epoch": 0.07127031019202364,
      "grad_norm": 0.3799235224723816,
      "learning_rate": 0.00019536888779406333,
      "loss": 0.5288,
      "step": 193
    },
    {
      "epoch": 0.07163958641063516,
      "grad_norm": 0.2929205894470215,
      "learning_rate": 0.0001953442542184998,
      "loss": 0.3563,
      "step": 194
    },
    {
      "epoch": 0.07200886262924668,
      "grad_norm": 0.3236566483974457,
      "learning_rate": 0.00019531962064293633,
      "loss": 0.3988,
      "step": 195
    },
    {
      "epoch": 0.0723781388478582,
      "grad_norm": 0.38911980390548706,
      "learning_rate": 0.00019529498706737284,
      "loss": 0.5104,
      "step": 196
    },
    {
      "epoch": 0.07274741506646971,
      "grad_norm": 0.3128013014793396,
      "learning_rate": 0.00019527035349180936,
      "loss": 0.4066,
      "step": 197
    },
    {
      "epoch": 0.07311669128508123,
      "grad_norm": 0.3556188642978668,
      "learning_rate": 0.00019524571991624584,
      "loss": 0.5096,
      "step": 198
    },
    {
      "epoch": 0.07348596750369277,
      "grad_norm": 0.2989361584186554,
      "learning_rate": 0.00019522108634068236,
      "loss": 0.33,
      "step": 199
    },
    {
      "epoch": 0.07385524372230429,
      "grad_norm": 0.5028315782546997,
      "learning_rate": 0.00019519645276511887,
      "loss": 0.5224,
      "step": 200
    },
    {
      "epoch": 0.07385524372230429,
      "eval_loss": 0.39015138149261475,
      "eval_runtime": 5.896,
      "eval_samples_per_second": 8.48,
      "eval_steps_per_second": 1.187,
      "step": 200
    },
    {
      "epoch": 0.0742245199409158,
      "grad_norm": 0.3487118184566498,
      "learning_rate": 0.0001951718191895554,
      "loss": 0.5037,
      "step": 201
    },
    {
      "epoch": 0.07459379615952733,
      "grad_norm": 0.34047648310661316,
      "learning_rate": 0.00019514718561399188,
      "loss": 0.439,
      "step": 202
    },
    {
      "epoch": 0.07496307237813885,
      "grad_norm": 0.32839900255203247,
      "learning_rate": 0.0001951225520384284,
      "loss": 0.4776,
      "step": 203
    },
    {
      "epoch": 0.07533234859675036,
      "grad_norm": 0.349039763212204,
      "learning_rate": 0.0001950979184628649,
      "loss": 0.4567,
      "step": 204
    },
    {
      "epoch": 0.07570162481536188,
      "grad_norm": 0.37458980083465576,
      "learning_rate": 0.00019507328488730142,
      "loss": 0.4025,
      "step": 205
    },
    {
      "epoch": 0.07607090103397342,
      "grad_norm": 0.32469749450683594,
      "learning_rate": 0.0001950486513117379,
      "loss": 0.4164,
      "step": 206
    },
    {
      "epoch": 0.07644017725258494,
      "grad_norm": 0.259811133146286,
      "learning_rate": 0.00019502401773617442,
      "loss": 0.3229,
      "step": 207
    },
    {
      "epoch": 0.07680945347119646,
      "grad_norm": 0.3223322629928589,
      "learning_rate": 0.0001949993841606109,
      "loss": 0.4247,
      "step": 208
    },
    {
      "epoch": 0.07717872968980798,
      "grad_norm": 0.29984578490257263,
      "learning_rate": 0.00019497475058504745,
      "loss": 0.3751,
      "step": 209
    },
    {
      "epoch": 0.0775480059084195,
      "grad_norm": 0.2897316515445709,
      "learning_rate": 0.00019495011700948394,
      "loss": 0.3907,
      "step": 210
    },
    {
      "epoch": 0.07791728212703102,
      "grad_norm": 0.31401652097702026,
      "learning_rate": 0.00019492548343392045,
      "loss": 0.42,
      "step": 211
    },
    {
      "epoch": 0.07828655834564253,
      "grad_norm": 0.29450908303260803,
      "learning_rate": 0.00019490084985835694,
      "loss": 0.4077,
      "step": 212
    },
    {
      "epoch": 0.07865583456425407,
      "grad_norm": 0.2941333055496216,
      "learning_rate": 0.00019487621628279346,
      "loss": 0.3752,
      "step": 213
    },
    {
      "epoch": 0.07902511078286559,
      "grad_norm": 0.3410256505012512,
      "learning_rate": 0.00019485158270722997,
      "loss": 0.4759,
      "step": 214
    },
    {
      "epoch": 0.0793943870014771,
      "grad_norm": 0.35839465260505676,
      "learning_rate": 0.00019482694913166648,
      "loss": 0.4277,
      "step": 215
    },
    {
      "epoch": 0.07976366322008863,
      "grad_norm": 0.3427143394947052,
      "learning_rate": 0.00019480231555610297,
      "loss": 0.4251,
      "step": 216
    },
    {
      "epoch": 0.08013293943870015,
      "grad_norm": 0.27888450026512146,
      "learning_rate": 0.0001947776819805395,
      "loss": 0.3859,
      "step": 217
    },
    {
      "epoch": 0.08050221565731167,
      "grad_norm": 0.36770373582839966,
      "learning_rate": 0.000194753048404976,
      "loss": 0.459,
      "step": 218
    },
    {
      "epoch": 0.08087149187592318,
      "grad_norm": 0.29189980030059814,
      "learning_rate": 0.00019472841482941252,
      "loss": 0.3102,
      "step": 219
    },
    {
      "epoch": 0.08124076809453472,
      "grad_norm": 0.3150429427623749,
      "learning_rate": 0.000194703781253849,
      "loss": 0.401,
      "step": 220
    },
    {
      "epoch": 0.08161004431314624,
      "grad_norm": 0.3211479187011719,
      "learning_rate": 0.00019467914767828552,
      "loss": 0.421,
      "step": 221
    },
    {
      "epoch": 0.08197932053175776,
      "grad_norm": 0.26583531498908997,
      "learning_rate": 0.000194654514102722,
      "loss": 0.388,
      "step": 222
    },
    {
      "epoch": 0.08234859675036928,
      "grad_norm": 0.321421355009079,
      "learning_rate": 0.00019462988052715855,
      "loss": 0.4625,
      "step": 223
    },
    {
      "epoch": 0.0827178729689808,
      "grad_norm": 0.31825941801071167,
      "learning_rate": 0.00019460524695159504,
      "loss": 0.4025,
      "step": 224
    },
    {
      "epoch": 0.08308714918759232,
      "grad_norm": 0.31772172451019287,
      "learning_rate": 0.00019458061337603155,
      "loss": 0.436,
      "step": 225
    },
    {
      "epoch": 0.08345642540620384,
      "grad_norm": 0.2731233835220337,
      "learning_rate": 0.00019455597980046804,
      "loss": 0.3841,
      "step": 226
    },
    {
      "epoch": 0.08382570162481537,
      "grad_norm": 0.28971999883651733,
      "learning_rate": 0.00019453134622490455,
      "loss": 0.3601,
      "step": 227
    },
    {
      "epoch": 0.08419497784342689,
      "grad_norm": 0.25201430916786194,
      "learning_rate": 0.00019450671264934107,
      "loss": 0.2993,
      "step": 228
    },
    {
      "epoch": 0.08456425406203841,
      "grad_norm": 0.31219369173049927,
      "learning_rate": 0.00019448207907377758,
      "loss": 0.3776,
      "step": 229
    },
    {
      "epoch": 0.08493353028064993,
      "grad_norm": 0.379317969083786,
      "learning_rate": 0.00019445744549821407,
      "loss": 0.5589,
      "step": 230
    },
    {
      "epoch": 0.08530280649926145,
      "grad_norm": 0.311305433511734,
      "learning_rate": 0.00019443281192265058,
      "loss": 0.4364,
      "step": 231
    },
    {
      "epoch": 0.08567208271787297,
      "grad_norm": 0.32585617899894714,
      "learning_rate": 0.0001944081783470871,
      "loss": 0.4224,
      "step": 232
    },
    {
      "epoch": 0.08604135893648449,
      "grad_norm": 0.26801955699920654,
      "learning_rate": 0.0001943835447715236,
      "loss": 0.3569,
      "step": 233
    },
    {
      "epoch": 0.086410635155096,
      "grad_norm": 0.3283174932003021,
      "learning_rate": 0.0001943589111959601,
      "loss": 0.4421,
      "step": 234
    },
    {
      "epoch": 0.08677991137370754,
      "grad_norm": 0.3212074935436249,
      "learning_rate": 0.00019433427762039661,
      "loss": 0.4274,
      "step": 235
    },
    {
      "epoch": 0.08714918759231906,
      "grad_norm": 0.3012539744377136,
      "learning_rate": 0.00019430964404483313,
      "loss": 0.3998,
      "step": 236
    },
    {
      "epoch": 0.08751846381093058,
      "grad_norm": 0.31821128726005554,
      "learning_rate": 0.00019428501046926962,
      "loss": 0.3862,
      "step": 237
    },
    {
      "epoch": 0.0878877400295421,
      "grad_norm": 0.3257669508457184,
      "learning_rate": 0.00019426037689370613,
      "loss": 0.5178,
      "step": 238
    },
    {
      "epoch": 0.08825701624815362,
      "grad_norm": 0.3112789988517761,
      "learning_rate": 0.00019423574331814262,
      "loss": 0.4182,
      "step": 239
    },
    {
      "epoch": 0.08862629246676514,
      "grad_norm": 0.25881633162498474,
      "learning_rate": 0.00019421110974257913,
      "loss": 0.3079,
      "step": 240
    },
    {
      "epoch": 0.08899556868537666,
      "grad_norm": 0.31627917289733887,
      "learning_rate": 0.00019418647616701565,
      "loss": 0.461,
      "step": 241
    },
    {
      "epoch": 0.08936484490398819,
      "grad_norm": 0.3195187449455261,
      "learning_rate": 0.00019416184259145216,
      "loss": 0.4439,
      "step": 242
    },
    {
      "epoch": 0.08973412112259971,
      "grad_norm": 0.3452574908733368,
      "learning_rate": 0.00019413720901588865,
      "loss": 0.4799,
      "step": 243
    },
    {
      "epoch": 0.09010339734121123,
      "grad_norm": 0.336542546749115,
      "learning_rate": 0.00019411257544032517,
      "loss": 0.4815,
      "step": 244
    },
    {
      "epoch": 0.09047267355982275,
      "grad_norm": 0.31545954942703247,
      "learning_rate": 0.00019408794186476168,
      "loss": 0.4452,
      "step": 245
    },
    {
      "epoch": 0.09084194977843427,
      "grad_norm": 0.3060772716999054,
      "learning_rate": 0.0001940633082891982,
      "loss": 0.4713,
      "step": 246
    },
    {
      "epoch": 0.09121122599704579,
      "grad_norm": 0.3096682131290436,
      "learning_rate": 0.00019403867471363468,
      "loss": 0.4104,
      "step": 247
    },
    {
      "epoch": 0.0915805022156573,
      "grad_norm": 0.2977633476257324,
      "learning_rate": 0.0001940140411380712,
      "loss": 0.4928,
      "step": 248
    },
    {
      "epoch": 0.09194977843426884,
      "grad_norm": 0.2890436053276062,
      "learning_rate": 0.00019398940756250768,
      "loss": 0.4116,
      "step": 249
    },
    {
      "epoch": 0.09231905465288036,
      "grad_norm": 0.2783840596675873,
      "learning_rate": 0.00019396477398694423,
      "loss": 0.3643,
      "step": 250
    },
    {
      "epoch": 0.09231905465288036,
      "eval_loss": 0.3816169500350952,
      "eval_runtime": 5.8658,
      "eval_samples_per_second": 8.524,
      "eval_steps_per_second": 1.193,
      "step": 250
    },
    {
      "epoch": 0.09268833087149188,
      "grad_norm": 0.3246957063674927,
      "learning_rate": 0.00019394014041138071,
      "loss": 0.477,
      "step": 251
    },
    {
      "epoch": 0.0930576070901034,
      "grad_norm": 0.3633597493171692,
      "learning_rate": 0.00019391550683581723,
      "loss": 0.4847,
      "step": 252
    },
    {
      "epoch": 0.09342688330871492,
      "grad_norm": 0.3402022421360016,
      "learning_rate": 0.00019389087326025372,
      "loss": 0.4732,
      "step": 253
    },
    {
      "epoch": 0.09379615952732644,
      "grad_norm": 0.40175941586494446,
      "learning_rate": 0.00019386623968469023,
      "loss": 0.3792,
      "step": 254
    },
    {
      "epoch": 0.09416543574593796,
      "grad_norm": 0.3117552101612091,
      "learning_rate": 0.00019384160610912675,
      "loss": 0.381,
      "step": 255
    },
    {
      "epoch": 0.09453471196454949,
      "grad_norm": 0.26914849877357483,
      "learning_rate": 0.00019381697253356326,
      "loss": 0.3325,
      "step": 256
    },
    {
      "epoch": 0.09490398818316101,
      "grad_norm": 0.3286375403404236,
      "learning_rate": 0.00019379233895799975,
      "loss": 0.4223,
      "step": 257
    },
    {
      "epoch": 0.09527326440177253,
      "grad_norm": 0.31570112705230713,
      "learning_rate": 0.00019376770538243626,
      "loss": 0.4549,
      "step": 258
    },
    {
      "epoch": 0.09564254062038405,
      "grad_norm": 0.3004911541938782,
      "learning_rate": 0.00019374307180687278,
      "loss": 0.422,
      "step": 259
    },
    {
      "epoch": 0.09601181683899557,
      "grad_norm": 0.316474974155426,
      "learning_rate": 0.0001937184382313093,
      "loss": 0.451,
      "step": 260
    },
    {
      "epoch": 0.09638109305760709,
      "grad_norm": 0.3089964687824249,
      "learning_rate": 0.00019369380465574578,
      "loss": 0.426,
      "step": 261
    },
    {
      "epoch": 0.0967503692762186,
      "grad_norm": 0.28488385677337646,
      "learning_rate": 0.0001936691710801823,
      "loss": 0.3753,
      "step": 262
    },
    {
      "epoch": 0.09711964549483014,
      "grad_norm": 0.27882590889930725,
      "learning_rate": 0.0001936445375046188,
      "loss": 0.3279,
      "step": 263
    },
    {
      "epoch": 0.09748892171344166,
      "grad_norm": 0.3286533057689667,
      "learning_rate": 0.00019361990392905532,
      "loss": 0.4219,
      "step": 264
    },
    {
      "epoch": 0.09785819793205318,
      "grad_norm": 0.3470388948917389,
      "learning_rate": 0.0001935952703534918,
      "loss": 0.4469,
      "step": 265
    },
    {
      "epoch": 0.0982274741506647,
      "grad_norm": 0.2946823537349701,
      "learning_rate": 0.00019357063677792832,
      "loss": 0.4202,
      "step": 266
    },
    {
      "epoch": 0.09859675036927622,
      "grad_norm": 0.35018935799598694,
      "learning_rate": 0.0001935460032023648,
      "loss": 0.4219,
      "step": 267
    },
    {
      "epoch": 0.09896602658788774,
      "grad_norm": 0.3789230287075043,
      "learning_rate": 0.00019352136962680135,
      "loss": 0.4787,
      "step": 268
    },
    {
      "epoch": 0.09933530280649926,
      "grad_norm": 0.26236382126808167,
      "learning_rate": 0.00019349673605123784,
      "loss": 0.3258,
      "step": 269
    },
    {
      "epoch": 0.09970457902511078,
      "grad_norm": 0.3044803738594055,
      "learning_rate": 0.00019347210247567436,
      "loss": 0.4202,
      "step": 270
    },
    {
      "epoch": 0.10007385524372231,
      "grad_norm": 0.34376615285873413,
      "learning_rate": 0.00019344746890011084,
      "loss": 0.3709,
      "step": 271
    },
    {
      "epoch": 0.10044313146233383,
      "grad_norm": 0.2787488102912903,
      "learning_rate": 0.00019342283532454736,
      "loss": 0.3019,
      "step": 272
    },
    {
      "epoch": 0.10081240768094535,
      "grad_norm": 0.2931845784187317,
      "learning_rate": 0.00019339820174898387,
      "loss": 0.3439,
      "step": 273
    },
    {
      "epoch": 0.10118168389955687,
      "grad_norm": 0.3124999701976776,
      "learning_rate": 0.0001933735681734204,
      "loss": 0.3801,
      "step": 274
    },
    {
      "epoch": 0.10155096011816839,
      "grad_norm": 0.34231653809547424,
      "learning_rate": 0.00019334893459785688,
      "loss": 0.4276,
      "step": 275
    },
    {
      "epoch": 0.1019202363367799,
      "grad_norm": 0.26137593388557434,
      "learning_rate": 0.0001933243010222934,
      "loss": 0.3719,
      "step": 276
    },
    {
      "epoch": 0.10228951255539143,
      "grad_norm": 0.30112504959106445,
      "learning_rate": 0.0001932996674467299,
      "loss": 0.4047,
      "step": 277
    },
    {
      "epoch": 0.10265878877400296,
      "grad_norm": 0.2984439730644226,
      "learning_rate": 0.00019327503387116642,
      "loss": 0.3506,
      "step": 278
    },
    {
      "epoch": 0.10302806499261448,
      "grad_norm": 0.3731374144554138,
      "learning_rate": 0.0001932504002956029,
      "loss": 0.439,
      "step": 279
    },
    {
      "epoch": 0.103397341211226,
      "grad_norm": 0.29310372471809387,
      "learning_rate": 0.00019322576672003942,
      "loss": 0.3963,
      "step": 280
    },
    {
      "epoch": 0.10376661742983752,
      "grad_norm": 0.3897750973701477,
      "learning_rate": 0.0001932011331444759,
      "loss": 0.4959,
      "step": 281
    },
    {
      "epoch": 0.10413589364844904,
      "grad_norm": 0.29394257068634033,
      "learning_rate": 0.00019317649956891245,
      "loss": 0.3666,
      "step": 282
    },
    {
      "epoch": 0.10450516986706056,
      "grad_norm": 0.3201434314250946,
      "learning_rate": 0.00019315186599334894,
      "loss": 0.3513,
      "step": 283
    },
    {
      "epoch": 0.10487444608567208,
      "grad_norm": 0.2828335165977478,
      "learning_rate": 0.00019312723241778545,
      "loss": 0.3175,
      "step": 284
    },
    {
      "epoch": 0.10524372230428361,
      "grad_norm": 0.2940762937068939,
      "learning_rate": 0.00019310259884222194,
      "loss": 0.3655,
      "step": 285
    },
    {
      "epoch": 0.10561299852289513,
      "grad_norm": 0.3026212751865387,
      "learning_rate": 0.00019307796526665845,
      "loss": 0.3552,
      "step": 286
    },
    {
      "epoch": 0.10598227474150665,
      "grad_norm": 0.3048368990421295,
      "learning_rate": 0.00019305333169109497,
      "loss": 0.4266,
      "step": 287
    },
    {
      "epoch": 0.10635155096011817,
      "grad_norm": 0.28596359491348267,
      "learning_rate": 0.00019302869811553148,
      "loss": 0.3745,
      "step": 288
    },
    {
      "epoch": 0.10672082717872969,
      "grad_norm": 0.46713870763778687,
      "learning_rate": 0.00019300406453996797,
      "loss": 0.5193,
      "step": 289
    },
    {
      "epoch": 0.10709010339734121,
      "grad_norm": 0.3674304485321045,
      "learning_rate": 0.00019297943096440449,
      "loss": 0.4062,
      "step": 290
    },
    {
      "epoch": 0.10745937961595273,
      "grad_norm": 0.3007575571537018,
      "learning_rate": 0.000192954797388841,
      "loss": 0.3723,
      "step": 291
    },
    {
      "epoch": 0.10782865583456426,
      "grad_norm": 0.3332579433917999,
      "learning_rate": 0.00019293016381327752,
      "loss": 0.3609,
      "step": 292
    },
    {
      "epoch": 0.10819793205317578,
      "grad_norm": 0.31988173723220825,
      "learning_rate": 0.000192905530237714,
      "loss": 0.4013,
      "step": 293
    },
    {
      "epoch": 0.1085672082717873,
      "grad_norm": 0.3104618787765503,
      "learning_rate": 0.00019288089666215052,
      "loss": 0.3623,
      "step": 294
    },
    {
      "epoch": 0.10893648449039882,
      "grad_norm": 0.35320624709129333,
      "learning_rate": 0.00019285626308658703,
      "loss": 0.2864,
      "step": 295
    },
    {
      "epoch": 0.10930576070901034,
      "grad_norm": 0.24095280468463898,
      "learning_rate": 0.00019283162951102355,
      "loss": 0.288,
      "step": 296
    },
    {
      "epoch": 0.10967503692762186,
      "grad_norm": 0.34680652618408203,
      "learning_rate": 0.00019280699593546003,
      "loss": 0.4466,
      "step": 297
    },
    {
      "epoch": 0.11004431314623338,
      "grad_norm": 0.31190916895866394,
      "learning_rate": 0.00019278236235989655,
      "loss": 0.3376,
      "step": 298
    },
    {
      "epoch": 0.11041358936484491,
      "grad_norm": 0.316723108291626,
      "learning_rate": 0.00019275772878433304,
      "loss": 0.4236,
      "step": 299
    },
    {
      "epoch": 0.11078286558345643,
      "grad_norm": 0.2838907837867737,
      "learning_rate": 0.00019273309520876958,
      "loss": 0.3508,
      "step": 300
    },
    {
      "epoch": 0.11078286558345643,
      "eval_loss": 0.3670826852321625,
      "eval_runtime": 5.8544,
      "eval_samples_per_second": 8.541,
      "eval_steps_per_second": 1.196,
      "step": 300
    },
    {
      "epoch": 0.11115214180206795,
      "grad_norm": 0.3103469908237457,
      "learning_rate": 0.00019270846163320607,
      "loss": 0.4048,
      "step": 301
    },
    {
      "epoch": 0.11152141802067947,
      "grad_norm": 0.3298850655555725,
      "learning_rate": 0.00019268382805764258,
      "loss": 0.4088,
      "step": 302
    },
    {
      "epoch": 0.11189069423929099,
      "grad_norm": 0.2599942684173584,
      "learning_rate": 0.00019265919448207907,
      "loss": 0.3307,
      "step": 303
    },
    {
      "epoch": 0.11225997045790251,
      "grad_norm": 0.36178743839263916,
      "learning_rate": 0.00019263456090651558,
      "loss": 0.3637,
      "step": 304
    },
    {
      "epoch": 0.11262924667651403,
      "grad_norm": 0.38140785694122314,
      "learning_rate": 0.0001926099273309521,
      "loss": 0.5136,
      "step": 305
    },
    {
      "epoch": 0.11299852289512555,
      "grad_norm": 0.3953641355037689,
      "learning_rate": 0.0001925852937553886,
      "loss": 0.4494,
      "step": 306
    },
    {
      "epoch": 0.11336779911373708,
      "grad_norm": 0.34380874037742615,
      "learning_rate": 0.0001925606601798251,
      "loss": 0.3824,
      "step": 307
    },
    {
      "epoch": 0.1137370753323486,
      "grad_norm": 0.29729288816452026,
      "learning_rate": 0.00019253602660426161,
      "loss": 0.3685,
      "step": 308
    },
    {
      "epoch": 0.11410635155096012,
      "grad_norm": 0.7438095211982727,
      "learning_rate": 0.00019251139302869813,
      "loss": 0.3982,
      "step": 309
    },
    {
      "epoch": 0.11447562776957164,
      "grad_norm": 0.3309866487979889,
      "learning_rate": 0.00019248675945313464,
      "loss": 0.4468,
      "step": 310
    },
    {
      "epoch": 0.11484490398818316,
      "grad_norm": 0.26817625761032104,
      "learning_rate": 0.00019246212587757113,
      "loss": 0.3913,
      "step": 311
    },
    {
      "epoch": 0.11521418020679468,
      "grad_norm": 0.31022658944129944,
      "learning_rate": 0.00019243749230200765,
      "loss": 0.4418,
      "step": 312
    },
    {
      "epoch": 0.1155834564254062,
      "grad_norm": 0.32103851437568665,
      "learning_rate": 0.00019241285872644413,
      "loss": 0.4338,
      "step": 313
    },
    {
      "epoch": 0.11595273264401773,
      "grad_norm": 0.25964781641960144,
      "learning_rate": 0.00019238822515088068,
      "loss": 0.2941,
      "step": 314
    },
    {
      "epoch": 0.11632200886262925,
      "grad_norm": 0.3442859351634979,
      "learning_rate": 0.00019236359157531716,
      "loss": 0.4596,
      "step": 315
    },
    {
      "epoch": 0.11669128508124077,
      "grad_norm": 0.3221491873264313,
      "learning_rate": 0.00019233895799975368,
      "loss": 0.4285,
      "step": 316
    },
    {
      "epoch": 0.11706056129985229,
      "grad_norm": 0.32146963477134705,
      "learning_rate": 0.00019231432442419016,
      "loss": 0.3989,
      "step": 317
    },
    {
      "epoch": 0.11742983751846381,
      "grad_norm": 0.3061736524105072,
      "learning_rate": 0.00019228969084862668,
      "loss": 0.3835,
      "step": 318
    },
    {
      "epoch": 0.11779911373707533,
      "grad_norm": 0.3913908302783966,
      "learning_rate": 0.0001922650572730632,
      "loss": 0.4835,
      "step": 319
    },
    {
      "epoch": 0.11816838995568685,
      "grad_norm": 0.31400611996650696,
      "learning_rate": 0.0001922404236974997,
      "loss": 0.4233,
      "step": 320
    },
    {
      "epoch": 0.11853766617429838,
      "grad_norm": 0.36072105169296265,
      "learning_rate": 0.0001922157901219362,
      "loss": 0.4879,
      "step": 321
    },
    {
      "epoch": 0.1189069423929099,
      "grad_norm": 0.3598588705062866,
      "learning_rate": 0.0001921911565463727,
      "loss": 0.4645,
      "step": 322
    },
    {
      "epoch": 0.11927621861152142,
      "grad_norm": 0.3355506956577301,
      "learning_rate": 0.00019216652297080923,
      "loss": 0.3992,
      "step": 323
    },
    {
      "epoch": 0.11964549483013294,
      "grad_norm": 0.2957211136817932,
      "learning_rate": 0.00019214188939524574,
      "loss": 0.3671,
      "step": 324
    },
    {
      "epoch": 0.12001477104874446,
      "grad_norm": 0.29227033257484436,
      "learning_rate": 0.00019211725581968223,
      "loss": 0.3793,
      "step": 325
    },
    {
      "epoch": 0.12038404726735598,
      "grad_norm": 0.2803254723548889,
      "learning_rate": 0.00019209262224411874,
      "loss": 0.3603,
      "step": 326
    },
    {
      "epoch": 0.1207533234859675,
      "grad_norm": 0.5801281332969666,
      "learning_rate": 0.00019206798866855523,
      "loss": 0.5687,
      "step": 327
    },
    {
      "epoch": 0.12112259970457903,
      "grad_norm": 0.31717267632484436,
      "learning_rate": 0.00019204335509299177,
      "loss": 0.3732,
      "step": 328
    },
    {
      "epoch": 0.12149187592319055,
      "grad_norm": 0.33161666989326477,
      "learning_rate": 0.00019201872151742826,
      "loss": 0.3671,
      "step": 329
    },
    {
      "epoch": 0.12186115214180207,
      "grad_norm": 0.32501599192619324,
      "learning_rate": 0.00019199408794186477,
      "loss": 0.4167,
      "step": 330
    },
    {
      "epoch": 0.12223042836041359,
      "grad_norm": 0.309461385011673,
      "learning_rate": 0.00019196945436630126,
      "loss": 0.3919,
      "step": 331
    },
    {
      "epoch": 0.12259970457902511,
      "grad_norm": 0.29599183797836304,
      "learning_rate": 0.00019194482079073778,
      "loss": 0.4568,
      "step": 332
    },
    {
      "epoch": 0.12296898079763663,
      "grad_norm": 0.38673701882362366,
      "learning_rate": 0.0001919201872151743,
      "loss": 0.4347,
      "step": 333
    },
    {
      "epoch": 0.12333825701624815,
      "grad_norm": 0.3297078013420105,
      "learning_rate": 0.0001918955536396108,
      "loss": 0.4622,
      "step": 334
    },
    {
      "epoch": 0.12370753323485968,
      "grad_norm": 0.3321128487586975,
      "learning_rate": 0.0001918709200640473,
      "loss": 0.4433,
      "step": 335
    },
    {
      "epoch": 0.1240768094534712,
      "grad_norm": 0.2586759030818939,
      "learning_rate": 0.0001918462864884838,
      "loss": 0.3136,
      "step": 336
    },
    {
      "epoch": 0.12444608567208272,
      "grad_norm": 0.24902012944221497,
      "learning_rate": 0.00019182165291292032,
      "loss": 0.3435,
      "step": 337
    },
    {
      "epoch": 0.12481536189069424,
      "grad_norm": 0.4221639931201935,
      "learning_rate": 0.00019179701933735684,
      "loss": 0.3849,
      "step": 338
    },
    {
      "epoch": 0.12518463810930577,
      "grad_norm": 0.33962059020996094,
      "learning_rate": 0.00019177238576179332,
      "loss": 0.3935,
      "step": 339
    },
    {
      "epoch": 0.1255539143279173,
      "grad_norm": 0.35686731338500977,
      "learning_rate": 0.00019174775218622984,
      "loss": 0.4114,
      "step": 340
    },
    {
      "epoch": 0.1259231905465288,
      "grad_norm": 0.3300132155418396,
      "learning_rate": 0.00019172311861066635,
      "loss": 0.4289,
      "step": 341
    },
    {
      "epoch": 0.12629246676514033,
      "grad_norm": 0.30660581588745117,
      "learning_rate": 0.00019169848503510287,
      "loss": 0.3981,
      "step": 342
    },
    {
      "epoch": 0.12666174298375185,
      "grad_norm": 0.2764551639556885,
      "learning_rate": 0.00019167385145953936,
      "loss": 0.3313,
      "step": 343
    },
    {
      "epoch": 0.12703101920236337,
      "grad_norm": 0.33090174198150635,
      "learning_rate": 0.00019164921788397587,
      "loss": 0.3681,
      "step": 344
    },
    {
      "epoch": 0.1274002954209749,
      "grad_norm": 0.3257863521575928,
      "learning_rate": 0.00019162458430841236,
      "loss": 0.4006,
      "step": 345
    },
    {
      "epoch": 0.1277695716395864,
      "grad_norm": 0.2760215699672699,
      "learning_rate": 0.0001915999507328489,
      "loss": 0.3531,
      "step": 346
    },
    {
      "epoch": 0.12813884785819793,
      "grad_norm": 0.27463459968566895,
      "learning_rate": 0.0001915753171572854,
      "loss": 0.3362,
      "step": 347
    },
    {
      "epoch": 0.12850812407680945,
      "grad_norm": 0.2651127576828003,
      "learning_rate": 0.0001915506835817219,
      "loss": 0.3311,
      "step": 348
    },
    {
      "epoch": 0.12887740029542097,
      "grad_norm": 0.26319748163223267,
      "learning_rate": 0.0001915260500061584,
      "loss": 0.3812,
      "step": 349
    },
    {
      "epoch": 0.1292466765140325,
      "grad_norm": 0.3255946934223175,
      "learning_rate": 0.0001915014164305949,
      "loss": 0.3957,
      "step": 350
    },
    {
      "epoch": 0.1292466765140325,
      "eval_loss": 0.3631550669670105,
      "eval_runtime": 5.8601,
      "eval_samples_per_second": 8.532,
      "eval_steps_per_second": 1.195,
      "step": 350
    },
    {
      "epoch": 0.129615952732644,
      "grad_norm": 0.8458616733551025,
      "learning_rate": 0.00019147678285503142,
      "loss": 0.5132,
      "step": 351
    },
    {
      "epoch": 0.12998522895125553,
      "grad_norm": 0.2686603367328644,
      "learning_rate": 0.00019145214927946793,
      "loss": 0.2914,
      "step": 352
    },
    {
      "epoch": 0.13035450516986707,
      "grad_norm": 0.3185257911682129,
      "learning_rate": 0.00019142751570390442,
      "loss": 0.3779,
      "step": 353
    },
    {
      "epoch": 0.1307237813884786,
      "grad_norm": 0.40341976284980774,
      "learning_rate": 0.00019140288212834094,
      "loss": 0.4262,
      "step": 354
    },
    {
      "epoch": 0.1310930576070901,
      "grad_norm": 0.287338525056839,
      "learning_rate": 0.00019137824855277745,
      "loss": 0.3584,
      "step": 355
    },
    {
      "epoch": 0.13146233382570163,
      "grad_norm": 0.3099704384803772,
      "learning_rate": 0.00019135361497721396,
      "loss": 0.391,
      "step": 356
    },
    {
      "epoch": 0.13183161004431315,
      "grad_norm": 0.29361316561698914,
      "learning_rate": 0.00019132898140165045,
      "loss": 0.3941,
      "step": 357
    },
    {
      "epoch": 0.13220088626292467,
      "grad_norm": 0.3063497543334961,
      "learning_rate": 0.00019130434782608697,
      "loss": 0.3711,
      "step": 358
    },
    {
      "epoch": 0.1325701624815362,
      "grad_norm": 0.33277326822280884,
      "learning_rate": 0.00019127971425052345,
      "loss": 0.4204,
      "step": 359
    },
    {
      "epoch": 0.1329394387001477,
      "grad_norm": 0.2999245822429657,
      "learning_rate": 0.00019125508067496,
      "loss": 0.4316,
      "step": 360
    },
    {
      "epoch": 0.13330871491875923,
      "grad_norm": 0.3451574444770813,
      "learning_rate": 0.00019123044709939648,
      "loss": 0.4635,
      "step": 361
    },
    {
      "epoch": 0.13367799113737075,
      "grad_norm": 0.31505754590034485,
      "learning_rate": 0.000191205813523833,
      "loss": 0.424,
      "step": 362
    },
    {
      "epoch": 0.13404726735598227,
      "grad_norm": 0.3461337089538574,
      "learning_rate": 0.00019118117994826949,
      "loss": 0.413,
      "step": 363
    },
    {
      "epoch": 0.1344165435745938,
      "grad_norm": 0.28650403022766113,
      "learning_rate": 0.000191156546372706,
      "loss": 0.3617,
      "step": 364
    },
    {
      "epoch": 0.1347858197932053,
      "grad_norm": 0.3007316291332245,
      "learning_rate": 0.00019113191279714252,
      "loss": 0.3575,
      "step": 365
    },
    {
      "epoch": 0.13515509601181683,
      "grad_norm": 0.2797721028327942,
      "learning_rate": 0.00019110727922157903,
      "loss": 0.3361,
      "step": 366
    },
    {
      "epoch": 0.13552437223042835,
      "grad_norm": 0.30158933997154236,
      "learning_rate": 0.00019108264564601552,
      "loss": 0.3612,
      "step": 367
    },
    {
      "epoch": 0.1358936484490399,
      "grad_norm": 0.3466665744781494,
      "learning_rate": 0.00019105801207045203,
      "loss": 0.3723,
      "step": 368
    },
    {
      "epoch": 0.1362629246676514,
      "grad_norm": 0.293473482131958,
      "learning_rate": 0.00019103337849488855,
      "loss": 0.3841,
      "step": 369
    },
    {
      "epoch": 0.13663220088626293,
      "grad_norm": 0.39320608973503113,
      "learning_rate": 0.00019100874491932506,
      "loss": 0.4726,
      "step": 370
    },
    {
      "epoch": 0.13700147710487445,
      "grad_norm": 0.30709806084632874,
      "learning_rate": 0.00019098411134376155,
      "loss": 0.4062,
      "step": 371
    },
    {
      "epoch": 0.13737075332348597,
      "grad_norm": 0.3337753713130951,
      "learning_rate": 0.00019095947776819806,
      "loss": 0.3259,
      "step": 372
    },
    {
      "epoch": 0.1377400295420975,
      "grad_norm": 0.263394296169281,
      "learning_rate": 0.00019093484419263458,
      "loss": 0.3291,
      "step": 373
    },
    {
      "epoch": 0.138109305760709,
      "grad_norm": 0.2679811716079712,
      "learning_rate": 0.0001909102106170711,
      "loss": 0.3157,
      "step": 374
    },
    {
      "epoch": 0.13847858197932053,
      "grad_norm": 0.3496569097042084,
      "learning_rate": 0.00019088557704150758,
      "loss": 0.4116,
      "step": 375
    },
    {
      "epoch": 0.13884785819793205,
      "grad_norm": 0.36639106273651123,
      "learning_rate": 0.0001908609434659441,
      "loss": 0.4704,
      "step": 376
    },
    {
      "epoch": 0.13921713441654357,
      "grad_norm": 0.33925941586494446,
      "learning_rate": 0.00019083630989038058,
      "loss": 0.4368,
      "step": 377
    },
    {
      "epoch": 0.1395864106351551,
      "grad_norm": 0.3684757947921753,
      "learning_rate": 0.00019081167631481712,
      "loss": 0.3754,
      "step": 378
    },
    {
      "epoch": 0.1399556868537666,
      "grad_norm": 0.28193071484565735,
      "learning_rate": 0.0001907870427392536,
      "loss": 0.3299,
      "step": 379
    },
    {
      "epoch": 0.14032496307237813,
      "grad_norm": 0.2718585133552551,
      "learning_rate": 0.00019076240916369013,
      "loss": 0.3369,
      "step": 380
    },
    {
      "epoch": 0.14069423929098965,
      "grad_norm": 0.2778518795967102,
      "learning_rate": 0.00019073777558812661,
      "loss": 0.3625,
      "step": 381
    },
    {
      "epoch": 0.1410635155096012,
      "grad_norm": 0.29346945881843567,
      "learning_rate": 0.00019071314201256313,
      "loss": 0.3746,
      "step": 382
    },
    {
      "epoch": 0.14143279172821271,
      "grad_norm": 0.33248963952064514,
      "learning_rate": 0.00019068850843699964,
      "loss": 0.3983,
      "step": 383
    },
    {
      "epoch": 0.14180206794682423,
      "grad_norm": 0.32002905011177063,
      "learning_rate": 0.00019066387486143616,
      "loss": 0.4637,
      "step": 384
    },
    {
      "epoch": 0.14217134416543575,
      "grad_norm": 0.2967303693294525,
      "learning_rate": 0.00019063924128587265,
      "loss": 0.3241,
      "step": 385
    },
    {
      "epoch": 0.14254062038404727,
      "grad_norm": 0.3041168749332428,
      "learning_rate": 0.00019061460771030916,
      "loss": 0.389,
      "step": 386
    },
    {
      "epoch": 0.1429098966026588,
      "grad_norm": 0.2852483093738556,
      "learning_rate": 0.00019058997413474567,
      "loss": 0.3429,
      "step": 387
    },
    {
      "epoch": 0.1432791728212703,
      "grad_norm": 0.34776344895362854,
      "learning_rate": 0.0001905653405591822,
      "loss": 0.4762,
      "step": 388
    },
    {
      "epoch": 0.14364844903988183,
      "grad_norm": 0.3596150279045105,
      "learning_rate": 0.00019054070698361868,
      "loss": 0.4183,
      "step": 389
    },
    {
      "epoch": 0.14401772525849335,
      "grad_norm": 0.2584787905216217,
      "learning_rate": 0.0001905160734080552,
      "loss": 0.3065,
      "step": 390
    },
    {
      "epoch": 0.14438700147710487,
      "grad_norm": 0.3181762397289276,
      "learning_rate": 0.00019049143983249168,
      "loss": 0.4199,
      "step": 391
    },
    {
      "epoch": 0.1447562776957164,
      "grad_norm": 0.2671319246292114,
      "learning_rate": 0.00019046680625692822,
      "loss": 0.3182,
      "step": 392
    },
    {
      "epoch": 0.1451255539143279,
      "grad_norm": 0.27982842922210693,
      "learning_rate": 0.0001904421726813647,
      "loss": 0.3592,
      "step": 393
    },
    {
      "epoch": 0.14549483013293943,
      "grad_norm": 0.26896971464157104,
      "learning_rate": 0.00019041753910580122,
      "loss": 0.4185,
      "step": 394
    },
    {
      "epoch": 0.14586410635155095,
      "grad_norm": 0.2974869906902313,
      "learning_rate": 0.0001903929055302377,
      "loss": 0.4006,
      "step": 395
    },
    {
      "epoch": 0.14623338257016247,
      "grad_norm": 0.3384716510772705,
      "learning_rate": 0.00019036827195467423,
      "loss": 0.3985,
      "step": 396
    },
    {
      "epoch": 0.14660265878877402,
      "grad_norm": 0.2817436456680298,
      "learning_rate": 0.00019034363837911074,
      "loss": 0.3608,
      "step": 397
    },
    {
      "epoch": 0.14697193500738553,
      "grad_norm": 0.271081805229187,
      "learning_rate": 0.00019031900480354725,
      "loss": 0.3625,
      "step": 398
    },
    {
      "epoch": 0.14734121122599705,
      "grad_norm": 0.27475908398628235,
      "learning_rate": 0.00019029437122798374,
      "loss": 0.3769,
      "step": 399
    },
    {
      "epoch": 0.14771048744460857,
      "grad_norm": 0.36818766593933105,
      "learning_rate": 0.00019026973765242026,
      "loss": 0.4717,
      "step": 400
    },
    {
      "epoch": 0.14771048744460857,
      "eval_loss": 0.36711856722831726,
      "eval_runtime": 5.861,
      "eval_samples_per_second": 8.531,
      "eval_steps_per_second": 1.194,
      "step": 400
    },
    {
      "epoch": 0.1480797636632201,
      "grad_norm": 0.33024027943611145,
      "learning_rate": 0.00019024510407685677,
      "loss": 0.4197,
      "step": 401
    },
    {
      "epoch": 0.1484490398818316,
      "grad_norm": 0.29586273431777954,
      "learning_rate": 0.00019022047050129329,
      "loss": 0.3627,
      "step": 402
    },
    {
      "epoch": 0.14881831610044313,
      "grad_norm": 0.2561482787132263,
      "learning_rate": 0.00019019583692572977,
      "loss": 0.3229,
      "step": 403
    },
    {
      "epoch": 0.14918759231905465,
      "grad_norm": 0.40970271825790405,
      "learning_rate": 0.0001901712033501663,
      "loss": 0.3999,
      "step": 404
    },
    {
      "epoch": 0.14955686853766617,
      "grad_norm": 0.2951110601425171,
      "learning_rate": 0.0001901465697746028,
      "loss": 0.3577,
      "step": 405
    },
    {
      "epoch": 0.1499261447562777,
      "grad_norm": 0.2874334156513214,
      "learning_rate": 0.00019012193619903932,
      "loss": 0.4147,
      "step": 406
    },
    {
      "epoch": 0.1502954209748892,
      "grad_norm": 0.3102007806301117,
      "learning_rate": 0.0001900973026234758,
      "loss": 0.4029,
      "step": 407
    },
    {
      "epoch": 0.15066469719350073,
      "grad_norm": 0.36349523067474365,
      "learning_rate": 0.00019007266904791232,
      "loss": 0.4179,
      "step": 408
    },
    {
      "epoch": 0.15103397341211225,
      "grad_norm": 0.38406822085380554,
      "learning_rate": 0.0001900480354723488,
      "loss": 0.4687,
      "step": 409
    },
    {
      "epoch": 0.15140324963072377,
      "grad_norm": 0.29971274733543396,
      "learning_rate": 0.00019002340189678535,
      "loss": 0.3451,
      "step": 410
    },
    {
      "epoch": 0.15177252584933532,
      "grad_norm": 0.29210084676742554,
      "learning_rate": 0.00018999876832122184,
      "loss": 0.3963,
      "step": 411
    },
    {
      "epoch": 0.15214180206794684,
      "grad_norm": 0.2798343300819397,
      "learning_rate": 0.00018997413474565835,
      "loss": 0.3325,
      "step": 412
    },
    {
      "epoch": 0.15251107828655835,
      "grad_norm": 0.3042639493942261,
      "learning_rate": 0.00018994950117009484,
      "loss": 0.4247,
      "step": 413
    },
    {
      "epoch": 0.15288035450516987,
      "grad_norm": 0.3588464558124542,
      "learning_rate": 0.00018992486759453135,
      "loss": 0.3864,
      "step": 414
    },
    {
      "epoch": 0.1532496307237814,
      "grad_norm": 0.2987573742866516,
      "learning_rate": 0.00018990023401896787,
      "loss": 0.3517,
      "step": 415
    },
    {
      "epoch": 0.1536189069423929,
      "grad_norm": 0.31477200984954834,
      "learning_rate": 0.00018987560044340438,
      "loss": 0.3267,
      "step": 416
    },
    {
      "epoch": 0.15398818316100443,
      "grad_norm": 0.2674895226955414,
      "learning_rate": 0.00018985096686784087,
      "loss": 0.3123,
      "step": 417
    },
    {
      "epoch": 0.15435745937961595,
      "grad_norm": 0.3107979893684387,
      "learning_rate": 0.00018982633329227738,
      "loss": 0.3532,
      "step": 418
    },
    {
      "epoch": 0.15472673559822747,
      "grad_norm": 0.35842299461364746,
      "learning_rate": 0.0001898016997167139,
      "loss": 0.3993,
      "step": 419
    },
    {
      "epoch": 0.155096011816839,
      "grad_norm": 0.31787213683128357,
      "learning_rate": 0.00018977706614115041,
      "loss": 0.4211,
      "step": 420
    },
    {
      "epoch": 0.1554652880354505,
      "grad_norm": 0.4663584232330322,
      "learning_rate": 0.0001897524325655869,
      "loss": 0.481,
      "step": 421
    },
    {
      "epoch": 0.15583456425406203,
      "grad_norm": 0.2762092351913452,
      "learning_rate": 0.00018972779899002342,
      "loss": 0.3008,
      "step": 422
    },
    {
      "epoch": 0.15620384047267355,
      "grad_norm": 0.35082077980041504,
      "learning_rate": 0.0001897031654144599,
      "loss": 0.4183,
      "step": 423
    },
    {
      "epoch": 0.15657311669128507,
      "grad_norm": 0.29008132219314575,
      "learning_rate": 0.00018967853183889645,
      "loss": 0.3859,
      "step": 424
    },
    {
      "epoch": 0.15694239290989662,
      "grad_norm": 0.32294392585754395,
      "learning_rate": 0.00018965389826333293,
      "loss": 0.3669,
      "step": 425
    },
    {
      "epoch": 0.15731166912850814,
      "grad_norm": 0.2959410846233368,
      "learning_rate": 0.00018962926468776945,
      "loss": 0.3342,
      "step": 426
    },
    {
      "epoch": 0.15768094534711966,
      "grad_norm": 0.3213634788990021,
      "learning_rate": 0.00018960463111220593,
      "loss": 0.3667,
      "step": 427
    },
    {
      "epoch": 0.15805022156573117,
      "grad_norm": 0.30615440011024475,
      "learning_rate": 0.00018957999753664245,
      "loss": 0.4233,
      "step": 428
    },
    {
      "epoch": 0.1584194977843427,
      "grad_norm": 0.33406612277030945,
      "learning_rate": 0.00018955536396107896,
      "loss": 0.5,
      "step": 429
    },
    {
      "epoch": 0.1587887740029542,
      "grad_norm": 0.347396582365036,
      "learning_rate": 0.00018953073038551548,
      "loss": 0.3849,
      "step": 430
    },
    {
      "epoch": 0.15915805022156573,
      "grad_norm": 0.3068416118621826,
      "learning_rate": 0.00018950609680995197,
      "loss": 0.4376,
      "step": 431
    },
    {
      "epoch": 0.15952732644017725,
      "grad_norm": 0.2605426609516144,
      "learning_rate": 0.00018948146323438848,
      "loss": 0.3734,
      "step": 432
    },
    {
      "epoch": 0.15989660265878877,
      "grad_norm": 0.3910651206970215,
      "learning_rate": 0.000189456829658825,
      "loss": 0.3859,
      "step": 433
    },
    {
      "epoch": 0.1602658788774003,
      "grad_norm": 0.3249022960662842,
      "learning_rate": 0.0001894321960832615,
      "loss": 0.4983,
      "step": 434
    },
    {
      "epoch": 0.1606351550960118,
      "grad_norm": 0.3365795910358429,
      "learning_rate": 0.000189407562507698,
      "loss": 0.3826,
      "step": 435
    },
    {
      "epoch": 0.16100443131462333,
      "grad_norm": 0.304880291223526,
      "learning_rate": 0.0001893829289321345,
      "loss": 0.4046,
      "step": 436
    },
    {
      "epoch": 0.16137370753323485,
      "grad_norm": 0.2914137840270996,
      "learning_rate": 0.00018935829535657103,
      "loss": 0.3765,
      "step": 437
    },
    {
      "epoch": 0.16174298375184637,
      "grad_norm": 0.26551946997642517,
      "learning_rate": 0.00018933366178100754,
      "loss": 0.3176,
      "step": 438
    },
    {
      "epoch": 0.1621122599704579,
      "grad_norm": 0.29021403193473816,
      "learning_rate": 0.00018930902820544403,
      "loss": 0.4308,
      "step": 439
    },
    {
      "epoch": 0.16248153618906944,
      "grad_norm": 0.32742637395858765,
      "learning_rate": 0.00018928439462988054,
      "loss": 0.3765,
      "step": 440
    },
    {
      "epoch": 0.16285081240768096,
      "grad_norm": 0.3414210081100464,
      "learning_rate": 0.00018925976105431703,
      "loss": 0.3878,
      "step": 441
    },
    {
      "epoch": 0.16322008862629248,
      "grad_norm": 0.3173421621322632,
      "learning_rate": 0.00018923512747875357,
      "loss": 0.4032,
      "step": 442
    },
    {
      "epoch": 0.163589364844904,
      "grad_norm": 0.3012334406375885,
      "learning_rate": 0.00018921049390319006,
      "loss": 0.4113,
      "step": 443
    },
    {
      "epoch": 0.16395864106351551,
      "grad_norm": 0.291298508644104,
      "learning_rate": 0.00018918586032762658,
      "loss": 0.3144,
      "step": 444
    },
    {
      "epoch": 0.16432791728212703,
      "grad_norm": 0.2866033911705017,
      "learning_rate": 0.00018916122675206306,
      "loss": 0.3517,
      "step": 445
    },
    {
      "epoch": 0.16469719350073855,
      "grad_norm": 0.28361326456069946,
      "learning_rate": 0.00018913659317649958,
      "loss": 0.3092,
      "step": 446
    },
    {
      "epoch": 0.16506646971935007,
      "grad_norm": 0.28165173530578613,
      "learning_rate": 0.0001891119596009361,
      "loss": 0.332,
      "step": 447
    },
    {
      "epoch": 0.1654357459379616,
      "grad_norm": 0.27297672629356384,
      "learning_rate": 0.0001890873260253726,
      "loss": 0.3231,
      "step": 448
    },
    {
      "epoch": 0.1658050221565731,
      "grad_norm": 0.3012530505657196,
      "learning_rate": 0.0001890626924498091,
      "loss": 0.4061,
      "step": 449
    },
    {
      "epoch": 0.16617429837518463,
      "grad_norm": 0.9882247447967529,
      "learning_rate": 0.0001890380588742456,
      "loss": 0.4416,
      "step": 450
    },
    {
      "epoch": 0.16617429837518463,
      "eval_loss": 0.3619551956653595,
      "eval_runtime": 5.8578,
      "eval_samples_per_second": 8.536,
      "eval_steps_per_second": 1.195,
      "step": 450
    },
    {
      "epoch": 0.16654357459379615,
      "grad_norm": 0.36340954899787903,
      "learning_rate": 0.00018901342529868212,
      "loss": 0.3902,
      "step": 451
    },
    {
      "epoch": 0.16691285081240767,
      "grad_norm": 0.32566314935684204,
      "learning_rate": 0.00018898879172311864,
      "loss": 0.3252,
      "step": 452
    },
    {
      "epoch": 0.1672821270310192,
      "grad_norm": 0.2897259593009949,
      "learning_rate": 0.00018896415814755513,
      "loss": 0.4222,
      "step": 453
    },
    {
      "epoch": 0.16765140324963074,
      "grad_norm": 0.40911665558815,
      "learning_rate": 0.00018893952457199164,
      "loss": 0.4465,
      "step": 454
    },
    {
      "epoch": 0.16802067946824226,
      "grad_norm": 0.3212021589279175,
      "learning_rate": 0.00018891489099642813,
      "loss": 0.3713,
      "step": 455
    },
    {
      "epoch": 0.16838995568685378,
      "grad_norm": 0.2914236783981323,
      "learning_rate": 0.00018889025742086467,
      "loss": 0.3846,
      "step": 456
    },
    {
      "epoch": 0.1687592319054653,
      "grad_norm": 0.42819809913635254,
      "learning_rate": 0.00018886562384530116,
      "loss": 0.4756,
      "step": 457
    },
    {
      "epoch": 0.16912850812407682,
      "grad_norm": 0.3311080038547516,
      "learning_rate": 0.00018884099026973767,
      "loss": 0.4781,
      "step": 458
    },
    {
      "epoch": 0.16949778434268833,
      "grad_norm": 0.3339468836784363,
      "learning_rate": 0.00018881635669417416,
      "loss": 0.4054,
      "step": 459
    },
    {
      "epoch": 0.16986706056129985,
      "grad_norm": 0.282219797372818,
      "learning_rate": 0.00018879172311861067,
      "loss": 0.3678,
      "step": 460
    },
    {
      "epoch": 0.17023633677991137,
      "grad_norm": 0.34083542227745056,
      "learning_rate": 0.0001887670895430472,
      "loss": 0.3657,
      "step": 461
    },
    {
      "epoch": 0.1706056129985229,
      "grad_norm": 0.28936949372291565,
      "learning_rate": 0.0001887424559674837,
      "loss": 0.3375,
      "step": 462
    },
    {
      "epoch": 0.1709748892171344,
      "grad_norm": 0.2606366276741028,
      "learning_rate": 0.0001887178223919202,
      "loss": 0.3536,
      "step": 463
    },
    {
      "epoch": 0.17134416543574593,
      "grad_norm": 0.3246243894100189,
      "learning_rate": 0.0001886931888163567,
      "loss": 0.4642,
      "step": 464
    },
    {
      "epoch": 0.17171344165435745,
      "grad_norm": 0.3147363066673279,
      "learning_rate": 0.00018866855524079322,
      "loss": 0.4156,
      "step": 465
    },
    {
      "epoch": 0.17208271787296897,
      "grad_norm": 0.3270387351512909,
      "learning_rate": 0.00018864392166522973,
      "loss": 0.4417,
      "step": 466
    },
    {
      "epoch": 0.1724519940915805,
      "grad_norm": 0.3732544183731079,
      "learning_rate": 0.00018861928808966622,
      "loss": 0.4895,
      "step": 467
    },
    {
      "epoch": 0.172821270310192,
      "grad_norm": 0.3213241398334503,
      "learning_rate": 0.00018859465451410274,
      "loss": 0.3832,
      "step": 468
    },
    {
      "epoch": 0.17319054652880356,
      "grad_norm": 0.28574463725090027,
      "learning_rate": 0.00018857002093853922,
      "loss": 0.3981,
      "step": 469
    },
    {
      "epoch": 0.17355982274741508,
      "grad_norm": 0.29374152421951294,
      "learning_rate": 0.00018854538736297574,
      "loss": 0.3864,
      "step": 470
    },
    {
      "epoch": 0.1739290989660266,
      "grad_norm": 0.23526206612586975,
      "learning_rate": 0.00018852075378741225,
      "loss": 0.2948,
      "step": 471
    },
    {
      "epoch": 0.17429837518463812,
      "grad_norm": 0.30339983105659485,
      "learning_rate": 0.00018849612021184874,
      "loss": 0.4726,
      "step": 472
    },
    {
      "epoch": 0.17466765140324964,
      "grad_norm": 0.3456306457519531,
      "learning_rate": 0.00018847148663628526,
      "loss": 0.3829,
      "step": 473
    },
    {
      "epoch": 0.17503692762186115,
      "grad_norm": 0.3155786097049713,
      "learning_rate": 0.00018844685306072177,
      "loss": 0.4021,
      "step": 474
    },
    {
      "epoch": 0.17540620384047267,
      "grad_norm": 0.25776541233062744,
      "learning_rate": 0.00018842221948515829,
      "loss": 0.3003,
      "step": 475
    },
    {
      "epoch": 0.1757754800590842,
      "grad_norm": 0.40696579217910767,
      "learning_rate": 0.00018839758590959477,
      "loss": 0.4147,
      "step": 476
    },
    {
      "epoch": 0.1761447562776957,
      "grad_norm": 0.3052532374858856,
      "learning_rate": 0.0001883729523340313,
      "loss": 0.3591,
      "step": 477
    },
    {
      "epoch": 0.17651403249630723,
      "grad_norm": 0.2917540371417999,
      "learning_rate": 0.0001883483187584678,
      "loss": 0.399,
      "step": 478
    },
    {
      "epoch": 0.17688330871491875,
      "grad_norm": 0.3572671711444855,
      "learning_rate": 0.00018832368518290432,
      "loss": 0.3823,
      "step": 479
    },
    {
      "epoch": 0.17725258493353027,
      "grad_norm": 0.27778226137161255,
      "learning_rate": 0.0001882990516073408,
      "loss": 0.3781,
      "step": 480
    },
    {
      "epoch": 0.1776218611521418,
      "grad_norm": 0.2719777524471283,
      "learning_rate": 0.00018827441803177732,
      "loss": 0.3575,
      "step": 481
    },
    {
      "epoch": 0.1779911373707533,
      "grad_norm": 0.3379530906677246,
      "learning_rate": 0.0001882497844562138,
      "loss": 0.45,
      "step": 482
    },
    {
      "epoch": 0.17836041358936486,
      "grad_norm": 0.36061891913414,
      "learning_rate": 0.00018822515088065035,
      "loss": 0.4076,
      "step": 483
    },
    {
      "epoch": 0.17872968980797638,
      "grad_norm": 0.3114835023880005,
      "learning_rate": 0.00018820051730508684,
      "loss": 0.3469,
      "step": 484
    },
    {
      "epoch": 0.1790989660265879,
      "grad_norm": 0.382068008184433,
      "learning_rate": 0.00018817588372952335,
      "loss": 0.4085,
      "step": 485
    },
    {
      "epoch": 0.17946824224519942,
      "grad_norm": 0.2963516116142273,
      "learning_rate": 0.00018815125015395984,
      "loss": 0.3647,
      "step": 486
    },
    {
      "epoch": 0.17983751846381094,
      "grad_norm": 0.2859930396080017,
      "learning_rate": 0.00018812661657839635,
      "loss": 0.3862,
      "step": 487
    },
    {
      "epoch": 0.18020679468242246,
      "grad_norm": 0.2853689193725586,
      "learning_rate": 0.00018810198300283287,
      "loss": 0.3282,
      "step": 488
    },
    {
      "epoch": 0.18057607090103397,
      "grad_norm": 0.2989000082015991,
      "learning_rate": 0.00018807734942726938,
      "loss": 0.3584,
      "step": 489
    },
    {
      "epoch": 0.1809453471196455,
      "grad_norm": 0.3115358054637909,
      "learning_rate": 0.00018805271585170587,
      "loss": 0.3382,
      "step": 490
    },
    {
      "epoch": 0.181314623338257,
      "grad_norm": 0.2946613132953644,
      "learning_rate": 0.00018802808227614238,
      "loss": 0.3533,
      "step": 491
    },
    {
      "epoch": 0.18168389955686853,
      "grad_norm": 0.3262624144554138,
      "learning_rate": 0.0001880034487005789,
      "loss": 0.3515,
      "step": 492
    },
    {
      "epoch": 0.18205317577548005,
      "grad_norm": 0.2636186480522156,
      "learning_rate": 0.0001879788151250154,
      "loss": 0.3616,
      "step": 493
    },
    {
      "epoch": 0.18242245199409157,
      "grad_norm": 0.4874628484249115,
      "learning_rate": 0.0001879541815494519,
      "loss": 0.3429,
      "step": 494
    },
    {
      "epoch": 0.1827917282127031,
      "grad_norm": 0.3091892600059509,
      "learning_rate": 0.00018792954797388842,
      "loss": 0.4433,
      "step": 495
    },
    {
      "epoch": 0.1831610044313146,
      "grad_norm": 0.31530311703681946,
      "learning_rate": 0.0001879049143983249,
      "loss": 0.3118,
      "step": 496
    },
    {
      "epoch": 0.18353028064992616,
      "grad_norm": 0.3015526533126831,
      "learning_rate": 0.00018788028082276144,
      "loss": 0.4021,
      "step": 497
    },
    {
      "epoch": 0.18389955686853768,
      "grad_norm": 0.2697685658931732,
      "learning_rate": 0.00018785564724719793,
      "loss": 0.3786,
      "step": 498
    },
    {
      "epoch": 0.1842688330871492,
      "grad_norm": 0.2839634120464325,
      "learning_rate": 0.00018783101367163445,
      "loss": 0.374,
      "step": 499
    },
    {
      "epoch": 0.18463810930576072,
      "grad_norm": 0.3035372793674469,
      "learning_rate": 0.00018780638009607093,
      "loss": 0.3843,
      "step": 500
    },
    {
      "epoch": 0.18463810930576072,
      "eval_loss": 0.35243338346481323,
      "eval_runtime": 5.8487,
      "eval_samples_per_second": 8.549,
      "eval_steps_per_second": 1.197,
      "step": 500
    },
    {
      "epoch": 0.18500738552437224,
      "grad_norm": 0.2590104937553406,
      "learning_rate": 0.00018778174652050745,
      "loss": 0.292,
      "step": 501
    },
    {
      "epoch": 0.18537666174298376,
      "grad_norm": 0.25871342420578003,
      "learning_rate": 0.00018775711294494396,
      "loss": 0.3161,
      "step": 502
    },
    {
      "epoch": 0.18574593796159528,
      "grad_norm": 0.326386958360672,
      "learning_rate": 0.00018773247936938048,
      "loss": 0.3638,
      "step": 503
    },
    {
      "epoch": 0.1861152141802068,
      "grad_norm": 0.33548593521118164,
      "learning_rate": 0.00018770784579381697,
      "loss": 0.3966,
      "step": 504
    },
    {
      "epoch": 0.18648449039881831,
      "grad_norm": 0.2861921489238739,
      "learning_rate": 0.00018768321221825348,
      "loss": 0.3616,
      "step": 505
    },
    {
      "epoch": 0.18685376661742983,
      "grad_norm": 0.29699164628982544,
      "learning_rate": 0.00018765857864269,
      "loss": 0.3878,
      "step": 506
    },
    {
      "epoch": 0.18722304283604135,
      "grad_norm": 0.31700825691223145,
      "learning_rate": 0.0001876339450671265,
      "loss": 0.3539,
      "step": 507
    },
    {
      "epoch": 0.18759231905465287,
      "grad_norm": 0.2864786982536316,
      "learning_rate": 0.000187609311491563,
      "loss": 0.3535,
      "step": 508
    },
    {
      "epoch": 0.1879615952732644,
      "grad_norm": 0.293557345867157,
      "learning_rate": 0.0001875846779159995,
      "loss": 0.3172,
      "step": 509
    },
    {
      "epoch": 0.1883308714918759,
      "grad_norm": 0.2888350784778595,
      "learning_rate": 0.00018756004434043603,
      "loss": 0.3934,
      "step": 510
    },
    {
      "epoch": 0.18870014771048743,
      "grad_norm": 0.29072830080986023,
      "learning_rate": 0.00018753541076487254,
      "loss": 0.3309,
      "step": 511
    },
    {
      "epoch": 0.18906942392909898,
      "grad_norm": 0.3262498378753662,
      "learning_rate": 0.00018751077718930903,
      "loss": 0.4339,
      "step": 512
    },
    {
      "epoch": 0.1894387001477105,
      "grad_norm": 0.3033256530761719,
      "learning_rate": 0.00018748614361374554,
      "loss": 0.4156,
      "step": 513
    },
    {
      "epoch": 0.18980797636632202,
      "grad_norm": 0.3099603056907654,
      "learning_rate": 0.00018746151003818203,
      "loss": 0.3781,
      "step": 514
    },
    {
      "epoch": 0.19017725258493354,
      "grad_norm": 0.31491467356681824,
      "learning_rate": 0.00018743687646261857,
      "loss": 0.4179,
      "step": 515
    },
    {
      "epoch": 0.19054652880354506,
      "grad_norm": 0.32624998688697815,
      "learning_rate": 0.00018741224288705506,
      "loss": 0.4148,
      "step": 516
    },
    {
      "epoch": 0.19091580502215658,
      "grad_norm": 0.318103551864624,
      "learning_rate": 0.00018738760931149157,
      "loss": 0.3575,
      "step": 517
    },
    {
      "epoch": 0.1912850812407681,
      "grad_norm": 0.24202318489551544,
      "learning_rate": 0.00018736297573592806,
      "loss": 0.2451,
      "step": 518
    },
    {
      "epoch": 0.19165435745937962,
      "grad_norm": 0.3605307340621948,
      "learning_rate": 0.00018733834216036458,
      "loss": 0.3476,
      "step": 519
    },
    {
      "epoch": 0.19202363367799113,
      "grad_norm": 0.32067054510116577,
      "learning_rate": 0.0001873137085848011,
      "loss": 0.3924,
      "step": 520
    },
    {
      "epoch": 0.19239290989660265,
      "grad_norm": 0.2573811709880829,
      "learning_rate": 0.0001872890750092376,
      "loss": 0.3305,
      "step": 521
    },
    {
      "epoch": 0.19276218611521417,
      "grad_norm": 0.278073251247406,
      "learning_rate": 0.0001872644414336741,
      "loss": 0.3298,
      "step": 522
    },
    {
      "epoch": 0.1931314623338257,
      "grad_norm": 0.3071305453777313,
      "learning_rate": 0.0001872398078581106,
      "loss": 0.4292,
      "step": 523
    },
    {
      "epoch": 0.1935007385524372,
      "grad_norm": 0.2722351849079132,
      "learning_rate": 0.00018721517428254712,
      "loss": 0.3652,
      "step": 524
    },
    {
      "epoch": 0.19387001477104873,
      "grad_norm": 0.318668931722641,
      "learning_rate": 0.00018719054070698364,
      "loss": 0.3599,
      "step": 525
    },
    {
      "epoch": 0.19423929098966028,
      "grad_norm": 0.2947141230106354,
      "learning_rate": 0.00018716590713142013,
      "loss": 0.3261,
      "step": 526
    },
    {
      "epoch": 0.1946085672082718,
      "grad_norm": 0.31848984956741333,
      "learning_rate": 0.00018714127355585664,
      "loss": 0.3581,
      "step": 527
    },
    {
      "epoch": 0.19497784342688332,
      "grad_norm": 0.32939398288726807,
      "learning_rate": 0.00018711663998029313,
      "loss": 0.3677,
      "step": 528
    },
    {
      "epoch": 0.19534711964549484,
      "grad_norm": 0.2825155258178711,
      "learning_rate": 0.00018709200640472967,
      "loss": 0.3372,
      "step": 529
    },
    {
      "epoch": 0.19571639586410636,
      "grad_norm": 0.291613906621933,
      "learning_rate": 0.00018706737282916616,
      "loss": 0.3619,
      "step": 530
    },
    {
      "epoch": 0.19608567208271788,
      "grad_norm": 0.26579317450523376,
      "learning_rate": 0.00018704273925360267,
      "loss": 0.2948,
      "step": 531
    },
    {
      "epoch": 0.1964549483013294,
      "grad_norm": 0.2836756408214569,
      "learning_rate": 0.00018701810567803916,
      "loss": 0.3493,
      "step": 532
    },
    {
      "epoch": 0.19682422451994092,
      "grad_norm": 0.33159008622169495,
      "learning_rate": 0.00018699347210247567,
      "loss": 0.4069,
      "step": 533
    },
    {
      "epoch": 0.19719350073855244,
      "grad_norm": 0.26926809549331665,
      "learning_rate": 0.0001869688385269122,
      "loss": 0.3317,
      "step": 534
    },
    {
      "epoch": 0.19756277695716395,
      "grad_norm": 0.3338308334350586,
      "learning_rate": 0.0001869442049513487,
      "loss": 0.4025,
      "step": 535
    },
    {
      "epoch": 0.19793205317577547,
      "grad_norm": 0.3891758322715759,
      "learning_rate": 0.0001869195713757852,
      "loss": 0.4243,
      "step": 536
    },
    {
      "epoch": 0.198301329394387,
      "grad_norm": 0.2698891758918762,
      "learning_rate": 0.0001868949378002217,
      "loss": 0.2992,
      "step": 537
    },
    {
      "epoch": 0.1986706056129985,
      "grad_norm": 0.23717094957828522,
      "learning_rate": 0.00018687030422465822,
      "loss": 0.2854,
      "step": 538
    },
    {
      "epoch": 0.19903988183161003,
      "grad_norm": 0.3537690341472626,
      "learning_rate": 0.00018684567064909473,
      "loss": 0.435,
      "step": 539
    },
    {
      "epoch": 0.19940915805022155,
      "grad_norm": 0.32419681549072266,
      "learning_rate": 0.00018682103707353122,
      "loss": 0.323,
      "step": 540
    },
    {
      "epoch": 0.1997784342688331,
      "grad_norm": 0.31805306673049927,
      "learning_rate": 0.00018679640349796774,
      "loss": 0.4009,
      "step": 541
    },
    {
      "epoch": 0.20014771048744462,
      "grad_norm": 0.2685754895210266,
      "learning_rate": 0.00018677176992240425,
      "loss": 0.3502,
      "step": 542
    },
    {
      "epoch": 0.20051698670605614,
      "grad_norm": 0.2632409930229187,
      "learning_rate": 0.00018674713634684077,
      "loss": 0.4185,
      "step": 543
    },
    {
      "epoch": 0.20088626292466766,
      "grad_norm": 0.3476162552833557,
      "learning_rate": 0.00018672250277127725,
      "loss": 0.4793,
      "step": 544
    },
    {
      "epoch": 0.20125553914327918,
      "grad_norm": 0.3115142583847046,
      "learning_rate": 0.00018669786919571377,
      "loss": 0.4598,
      "step": 545
    },
    {
      "epoch": 0.2016248153618907,
      "grad_norm": 0.30133846402168274,
      "learning_rate": 0.00018667323562015026,
      "loss": 0.4154,
      "step": 546
    },
    {
      "epoch": 0.20199409158050222,
      "grad_norm": 0.2999810576438904,
      "learning_rate": 0.0001866486020445868,
      "loss": 0.338,
      "step": 547
    },
    {
      "epoch": 0.20236336779911374,
      "grad_norm": 0.3162882328033447,
      "learning_rate": 0.00018662396846902328,
      "loss": 0.3049,
      "step": 548
    },
    {
      "epoch": 0.20273264401772526,
      "grad_norm": 0.2815430760383606,
      "learning_rate": 0.0001865993348934598,
      "loss": 0.372,
      "step": 549
    },
    {
      "epoch": 0.20310192023633677,
      "grad_norm": 0.2795547544956207,
      "learning_rate": 0.0001865747013178963,
      "loss": 0.3706,
      "step": 550
    },
    {
      "epoch": 0.20310192023633677,
      "eval_loss": 0.350197970867157,
      "eval_runtime": 5.8621,
      "eval_samples_per_second": 8.529,
      "eval_steps_per_second": 1.194,
      "step": 550
    },
    {
      "epoch": 0.2034711964549483,
      "grad_norm": 0.39499425888061523,
      "learning_rate": 0.0001865500677423328,
      "loss": 0.442,
      "step": 551
    },
    {
      "epoch": 0.2038404726735598,
      "grad_norm": 0.32192522287368774,
      "learning_rate": 0.00018652543416676932,
      "loss": 0.4273,
      "step": 552
    },
    {
      "epoch": 0.20420974889217133,
      "grad_norm": 0.3077501654624939,
      "learning_rate": 0.00018650080059120583,
      "loss": 0.3648,
      "step": 553
    },
    {
      "epoch": 0.20457902511078285,
      "grad_norm": 0.24269762635231018,
      "learning_rate": 0.00018647616701564232,
      "loss": 0.3095,
      "step": 554
    },
    {
      "epoch": 0.2049483013293944,
      "grad_norm": 0.26936954259872437,
      "learning_rate": 0.00018645153344007883,
      "loss": 0.3352,
      "step": 555
    },
    {
      "epoch": 0.20531757754800592,
      "grad_norm": 0.2520939111709595,
      "learning_rate": 0.00018642689986451535,
      "loss": 0.3219,
      "step": 556
    },
    {
      "epoch": 0.20568685376661744,
      "grad_norm": 0.35456258058547974,
      "learning_rate": 0.00018640226628895186,
      "loss": 0.337,
      "step": 557
    },
    {
      "epoch": 0.20605612998522896,
      "grad_norm": 0.27598705887794495,
      "learning_rate": 0.00018637763271338835,
      "loss": 0.3331,
      "step": 558
    },
    {
      "epoch": 0.20642540620384048,
      "grad_norm": 0.3117420971393585,
      "learning_rate": 0.00018635299913782486,
      "loss": 0.4273,
      "step": 559
    },
    {
      "epoch": 0.206794682422452,
      "grad_norm": 0.3151145875453949,
      "learning_rate": 0.00018632836556226135,
      "loss": 0.3964,
      "step": 560
    },
    {
      "epoch": 0.20716395864106352,
      "grad_norm": 0.3065900206565857,
      "learning_rate": 0.0001863037319866979,
      "loss": 0.3189,
      "step": 561
    },
    {
      "epoch": 0.20753323485967504,
      "grad_norm": 0.23873600363731384,
      "learning_rate": 0.00018627909841113438,
      "loss": 0.3091,
      "step": 562
    },
    {
      "epoch": 0.20790251107828656,
      "grad_norm": 0.30931881070137024,
      "learning_rate": 0.0001862544648355709,
      "loss": 0.4215,
      "step": 563
    },
    {
      "epoch": 0.20827178729689808,
      "grad_norm": 0.328284353017807,
      "learning_rate": 0.00018622983126000738,
      "loss": 0.3981,
      "step": 564
    },
    {
      "epoch": 0.2086410635155096,
      "grad_norm": 0.3178199231624603,
      "learning_rate": 0.0001862051976844439,
      "loss": 0.4749,
      "step": 565
    },
    {
      "epoch": 0.20901033973412111,
      "grad_norm": 0.3410341441631317,
      "learning_rate": 0.0001861805641088804,
      "loss": 0.3619,
      "step": 566
    },
    {
      "epoch": 0.20937961595273263,
      "grad_norm": 0.28396570682525635,
      "learning_rate": 0.00018615593053331693,
      "loss": 0.3469,
      "step": 567
    },
    {
      "epoch": 0.20974889217134415,
      "grad_norm": 0.24647068977355957,
      "learning_rate": 0.00018613129695775341,
      "loss": 0.2867,
      "step": 568
    },
    {
      "epoch": 0.2101181683899557,
      "grad_norm": 0.3200088441371918,
      "learning_rate": 0.00018610666338218993,
      "loss": 0.3137,
      "step": 569
    },
    {
      "epoch": 0.21048744460856722,
      "grad_norm": 0.31181222200393677,
      "learning_rate": 0.00018608202980662644,
      "loss": 0.4194,
      "step": 570
    },
    {
      "epoch": 0.21085672082717874,
      "grad_norm": 0.2677057385444641,
      "learning_rate": 0.00018605739623106296,
      "loss": 0.3286,
      "step": 571
    },
    {
      "epoch": 0.21122599704579026,
      "grad_norm": 0.3450012803077698,
      "learning_rate": 0.00018603276265549945,
      "loss": 0.3933,
      "step": 572
    },
    {
      "epoch": 0.21159527326440178,
      "grad_norm": 0.2596141993999481,
      "learning_rate": 0.00018600812907993596,
      "loss": 0.3692,
      "step": 573
    },
    {
      "epoch": 0.2119645494830133,
      "grad_norm": 0.2887343466281891,
      "learning_rate": 0.00018598349550437248,
      "loss": 0.3645,
      "step": 574
    },
    {
      "epoch": 0.21233382570162482,
      "grad_norm": 0.2597286105155945,
      "learning_rate": 0.000185958861928809,
      "loss": 0.3141,
      "step": 575
    },
    {
      "epoch": 0.21270310192023634,
      "grad_norm": 0.27257657051086426,
      "learning_rate": 0.00018593422835324548,
      "loss": 0.298,
      "step": 576
    },
    {
      "epoch": 0.21307237813884786,
      "grad_norm": 0.26083192229270935,
      "learning_rate": 0.000185909594777682,
      "loss": 0.3124,
      "step": 577
    },
    {
      "epoch": 0.21344165435745938,
      "grad_norm": 0.2763814926147461,
      "learning_rate": 0.00018588496120211848,
      "loss": 0.3169,
      "step": 578
    },
    {
      "epoch": 0.2138109305760709,
      "grad_norm": 0.37325096130371094,
      "learning_rate": 0.00018586032762655502,
      "loss": 0.4646,
      "step": 579
    },
    {
      "epoch": 0.21418020679468242,
      "grad_norm": 0.26927992701530457,
      "learning_rate": 0.0001858356940509915,
      "loss": 0.3276,
      "step": 580
    },
    {
      "epoch": 0.21454948301329393,
      "grad_norm": 0.29396751523017883,
      "learning_rate": 0.00018581106047542802,
      "loss": 0.2939,
      "step": 581
    },
    {
      "epoch": 0.21491875923190545,
      "grad_norm": 0.2815605700016022,
      "learning_rate": 0.0001857864268998645,
      "loss": 0.4065,
      "step": 582
    },
    {
      "epoch": 0.21528803545051697,
      "grad_norm": 0.6534555554389954,
      "learning_rate": 0.00018576179332430103,
      "loss": 0.4855,
      "step": 583
    },
    {
      "epoch": 0.21565731166912852,
      "grad_norm": 0.27794015407562256,
      "learning_rate": 0.00018573715974873754,
      "loss": 0.3871,
      "step": 584
    },
    {
      "epoch": 0.21602658788774004,
      "grad_norm": 0.3053019940853119,
      "learning_rate": 0.00018571252617317406,
      "loss": 0.3112,
      "step": 585
    },
    {
      "epoch": 0.21639586410635156,
      "grad_norm": 0.28791019320487976,
      "learning_rate": 0.00018568789259761054,
      "loss": 0.3504,
      "step": 586
    },
    {
      "epoch": 0.21676514032496308,
      "grad_norm": 0.32081103324890137,
      "learning_rate": 0.00018566325902204706,
      "loss": 0.3937,
      "step": 587
    },
    {
      "epoch": 0.2171344165435746,
      "grad_norm": 0.3388102352619171,
      "learning_rate": 0.00018563862544648357,
      "loss": 0.4336,
      "step": 588
    },
    {
      "epoch": 0.21750369276218612,
      "grad_norm": 0.24963918328285217,
      "learning_rate": 0.0001856139918709201,
      "loss": 0.3251,
      "step": 589
    },
    {
      "epoch": 0.21787296898079764,
      "grad_norm": 0.2805040180683136,
      "learning_rate": 0.00018558935829535657,
      "loss": 0.2674,
      "step": 590
    },
    {
      "epoch": 0.21824224519940916,
      "grad_norm": 0.26187995076179504,
      "learning_rate": 0.0001855647247197931,
      "loss": 0.3648,
      "step": 591
    },
    {
      "epoch": 0.21861152141802068,
      "grad_norm": 0.25983595848083496,
      "learning_rate": 0.00018554009114422958,
      "loss": 0.3169,
      "step": 592
    },
    {
      "epoch": 0.2189807976366322,
      "grad_norm": 0.3068999946117401,
      "learning_rate": 0.00018551545756866612,
      "loss": 0.3912,
      "step": 593
    },
    {
      "epoch": 0.21935007385524372,
      "grad_norm": 0.2944723963737488,
      "learning_rate": 0.0001854908239931026,
      "loss": 0.3702,
      "step": 594
    },
    {
      "epoch": 0.21971935007385524,
      "grad_norm": 0.29646021127700806,
      "learning_rate": 0.00018546619041753912,
      "loss": 0.4046,
      "step": 595
    },
    {
      "epoch": 0.22008862629246675,
      "grad_norm": 0.3013732135295868,
      "learning_rate": 0.0001854415568419756,
      "loss": 0.389,
      "step": 596
    },
    {
      "epoch": 0.22045790251107827,
      "grad_norm": 0.33398228883743286,
      "learning_rate": 0.00018541692326641212,
      "loss": 0.382,
      "step": 597
    },
    {
      "epoch": 0.22082717872968982,
      "grad_norm": 0.27201974391937256,
      "learning_rate": 0.00018539228969084864,
      "loss": 0.3684,
      "step": 598
    },
    {
      "epoch": 0.22119645494830134,
      "grad_norm": 0.29355520009994507,
      "learning_rate": 0.00018536765611528515,
      "loss": 0.3608,
      "step": 599
    },
    {
      "epoch": 0.22156573116691286,
      "grad_norm": 0.30281344056129456,
      "learning_rate": 0.00018534302253972164,
      "loss": 0.3562,
      "step": 600
    },
    {
      "epoch": 0.22156573116691286,
      "eval_loss": 0.34722042083740234,
      "eval_runtime": 5.8648,
      "eval_samples_per_second": 8.525,
      "eval_steps_per_second": 1.194,
      "step": 600
    },
    {
      "epoch": 0.22193500738552438,
      "grad_norm": 0.27795374393463135,
      "learning_rate": 0.00018531838896415815,
      "loss": 0.3903,
      "step": 601
    },
    {
      "epoch": 0.2223042836041359,
      "grad_norm": 0.47363731265068054,
      "learning_rate": 0.00018529375538859467,
      "loss": 0.4548,
      "step": 602
    },
    {
      "epoch": 0.22267355982274742,
      "grad_norm": 0.3682205080986023,
      "learning_rate": 0.00018526912181303118,
      "loss": 0.451,
      "step": 603
    },
    {
      "epoch": 0.22304283604135894,
      "grad_norm": 0.35432472825050354,
      "learning_rate": 0.00018524448823746767,
      "loss": 0.3183,
      "step": 604
    },
    {
      "epoch": 0.22341211225997046,
      "grad_norm": 0.3637113571166992,
      "learning_rate": 0.00018521985466190419,
      "loss": 0.4657,
      "step": 605
    },
    {
      "epoch": 0.22378138847858198,
      "grad_norm": 0.32639309763908386,
      "learning_rate": 0.0001851952210863407,
      "loss": 0.4005,
      "step": 606
    },
    {
      "epoch": 0.2241506646971935,
      "grad_norm": 0.3073771595954895,
      "learning_rate": 0.00018517058751077721,
      "loss": 0.3629,
      "step": 607
    },
    {
      "epoch": 0.22451994091580502,
      "grad_norm": 0.26197266578674316,
      "learning_rate": 0.0001851459539352137,
      "loss": 0.3401,
      "step": 608
    },
    {
      "epoch": 0.22488921713441654,
      "grad_norm": 0.2626654803752899,
      "learning_rate": 0.00018512132035965022,
      "loss": 0.3404,
      "step": 609
    },
    {
      "epoch": 0.22525849335302806,
      "grad_norm": 0.3299645781517029,
      "learning_rate": 0.0001850966867840867,
      "loss": 0.3654,
      "step": 610
    },
    {
      "epoch": 0.22562776957163957,
      "grad_norm": 0.3153376877307892,
      "learning_rate": 0.00018507205320852325,
      "loss": 0.3739,
      "step": 611
    },
    {
      "epoch": 0.2259970457902511,
      "grad_norm": 0.25192391872406006,
      "learning_rate": 0.00018504741963295973,
      "loss": 0.3939,
      "step": 612
    },
    {
      "epoch": 0.22636632200886264,
      "grad_norm": 0.3275706470012665,
      "learning_rate": 0.00018502278605739625,
      "loss": 0.4047,
      "step": 613
    },
    {
      "epoch": 0.22673559822747416,
      "grad_norm": 0.2731536030769348,
      "learning_rate": 0.00018499815248183274,
      "loss": 0.3351,
      "step": 614
    },
    {
      "epoch": 0.22710487444608568,
      "grad_norm": 0.2870076298713684,
      "learning_rate": 0.00018497351890626925,
      "loss": 0.3622,
      "step": 615
    },
    {
      "epoch": 0.2274741506646972,
      "grad_norm": 0.3062557280063629,
      "learning_rate": 0.00018494888533070577,
      "loss": 0.3861,
      "step": 616
    },
    {
      "epoch": 0.22784342688330872,
      "grad_norm": 0.3006839454174042,
      "learning_rate": 0.00018492425175514228,
      "loss": 0.4001,
      "step": 617
    },
    {
      "epoch": 0.22821270310192024,
      "grad_norm": 0.3576716482639313,
      "learning_rate": 0.00018489961817957877,
      "loss": 0.3705,
      "step": 618
    },
    {
      "epoch": 0.22858197932053176,
      "grad_norm": 0.29118189215660095,
      "learning_rate": 0.00018487498460401528,
      "loss": 0.3285,
      "step": 619
    },
    {
      "epoch": 0.22895125553914328,
      "grad_norm": 0.3169896900653839,
      "learning_rate": 0.0001848503510284518,
      "loss": 0.3153,
      "step": 620
    },
    {
      "epoch": 0.2293205317577548,
      "grad_norm": 0.3830879032611847,
      "learning_rate": 0.0001848257174528883,
      "loss": 0.4512,
      "step": 621
    },
    {
      "epoch": 0.22968980797636632,
      "grad_norm": 0.3761618435382843,
      "learning_rate": 0.0001848010838773248,
      "loss": 0.397,
      "step": 622
    },
    {
      "epoch": 0.23005908419497784,
      "grad_norm": 0.3072148859500885,
      "learning_rate": 0.0001847764503017613,
      "loss": 0.3602,
      "step": 623
    },
    {
      "epoch": 0.23042836041358936,
      "grad_norm": 0.2576681971549988,
      "learning_rate": 0.0001847518167261978,
      "loss": 0.3387,
      "step": 624
    },
    {
      "epoch": 0.23079763663220088,
      "grad_norm": 0.2789335250854492,
      "learning_rate": 0.00018472718315063434,
      "loss": 0.3678,
      "step": 625
    },
    {
      "epoch": 0.2311669128508124,
      "grad_norm": 0.26227229833602905,
      "learning_rate": 0.00018470254957507083,
      "loss": 0.281,
      "step": 626
    },
    {
      "epoch": 0.23153618906942394,
      "grad_norm": 0.2812472879886627,
      "learning_rate": 0.00018467791599950734,
      "loss": 0.3269,
      "step": 627
    },
    {
      "epoch": 0.23190546528803546,
      "grad_norm": 0.24520502984523773,
      "learning_rate": 0.00018465328242394383,
      "loss": 0.2865,
      "step": 628
    },
    {
      "epoch": 0.23227474150664698,
      "grad_norm": 0.38156548142433167,
      "learning_rate": 0.00018462864884838035,
      "loss": 0.4165,
      "step": 629
    },
    {
      "epoch": 0.2326440177252585,
      "grad_norm": 0.3365303575992584,
      "learning_rate": 0.00018460401527281686,
      "loss": 0.4914,
      "step": 630
    },
    {
      "epoch": 0.23301329394387002,
      "grad_norm": 0.3444092869758606,
      "learning_rate": 0.00018457938169725338,
      "loss": 0.4132,
      "step": 631
    },
    {
      "epoch": 0.23338257016248154,
      "grad_norm": 0.2728528678417206,
      "learning_rate": 0.00018455474812168986,
      "loss": 0.3059,
      "step": 632
    },
    {
      "epoch": 0.23375184638109306,
      "grad_norm": 0.2688409686088562,
      "learning_rate": 0.00018453011454612638,
      "loss": 0.3817,
      "step": 633
    },
    {
      "epoch": 0.23412112259970458,
      "grad_norm": 0.32839080691337585,
      "learning_rate": 0.0001845054809705629,
      "loss": 0.4481,
      "step": 634
    },
    {
      "epoch": 0.2344903988183161,
      "grad_norm": 0.29927435517311096,
      "learning_rate": 0.0001844808473949994,
      "loss": 0.364,
      "step": 635
    },
    {
      "epoch": 0.23485967503692762,
      "grad_norm": 0.3030961751937866,
      "learning_rate": 0.0001844562138194359,
      "loss": 0.3521,
      "step": 636
    },
    {
      "epoch": 0.23522895125553914,
      "grad_norm": 0.30552420020103455,
      "learning_rate": 0.0001844315802438724,
      "loss": 0.3481,
      "step": 637
    },
    {
      "epoch": 0.23559822747415066,
      "grad_norm": 0.2642765939235687,
      "learning_rate": 0.0001844069466683089,
      "loss": 0.3218,
      "step": 638
    },
    {
      "epoch": 0.23596750369276218,
      "grad_norm": 0.3970206677913666,
      "learning_rate": 0.00018438231309274544,
      "loss": 0.4712,
      "step": 639
    },
    {
      "epoch": 0.2363367799113737,
      "grad_norm": 0.28402864933013916,
      "learning_rate": 0.00018435767951718193,
      "loss": 0.3304,
      "step": 640
    },
    {
      "epoch": 0.23670605612998524,
      "grad_norm": 0.3056892454624176,
      "learning_rate": 0.00018433304594161844,
      "loss": 0.3698,
      "step": 641
    },
    {
      "epoch": 0.23707533234859676,
      "grad_norm": 0.29281875491142273,
      "learning_rate": 0.00018430841236605493,
      "loss": 0.3682,
      "step": 642
    },
    {
      "epoch": 0.23744460856720828,
      "grad_norm": 0.3088505268096924,
      "learning_rate": 0.00018428377879049144,
      "loss": 0.3412,
      "step": 643
    },
    {
      "epoch": 0.2378138847858198,
      "grad_norm": 0.3349262475967407,
      "learning_rate": 0.00018425914521492796,
      "loss": 0.3773,
      "step": 644
    },
    {
      "epoch": 0.23818316100443132,
      "grad_norm": 0.2669546902179718,
      "learning_rate": 0.00018423451163936447,
      "loss": 0.3013,
      "step": 645
    },
    {
      "epoch": 0.23855243722304284,
      "grad_norm": 0.27441346645355225,
      "learning_rate": 0.00018420987806380096,
      "loss": 0.3547,
      "step": 646
    },
    {
      "epoch": 0.23892171344165436,
      "grad_norm": 0.37875619530677795,
      "learning_rate": 0.00018418524448823748,
      "loss": 0.4332,
      "step": 647
    },
    {
      "epoch": 0.23929098966026588,
      "grad_norm": 0.35620611906051636,
      "learning_rate": 0.000184160610912674,
      "loss": 0.3944,
      "step": 648
    },
    {
      "epoch": 0.2396602658788774,
      "grad_norm": 0.45027297735214233,
      "learning_rate": 0.0001841359773371105,
      "loss": 0.317,
      "step": 649
    },
    {
      "epoch": 0.24002954209748892,
      "grad_norm": 0.23810823261737823,
      "learning_rate": 0.000184111343761547,
      "loss": 0.2787,
      "step": 650
    },
    {
      "epoch": 0.24002954209748892,
      "eval_loss": 0.35080447793006897,
      "eval_runtime": 5.8683,
      "eval_samples_per_second": 8.52,
      "eval_steps_per_second": 1.193,
      "step": 650
    },
    {
      "epoch": 0.24039881831610044,
      "grad_norm": 0.31773853302001953,
      "learning_rate": 0.0001840867101859835,
      "loss": 0.2863,
      "step": 651
    },
    {
      "epoch": 0.24076809453471196,
      "grad_norm": 0.273213654756546,
      "learning_rate": 0.00018406207661042002,
      "loss": 0.3398,
      "step": 652
    },
    {
      "epoch": 0.24113737075332348,
      "grad_norm": 0.31761434674263,
      "learning_rate": 0.00018403744303485654,
      "loss": 0.3654,
      "step": 653
    },
    {
      "epoch": 0.241506646971935,
      "grad_norm": 0.309353232383728,
      "learning_rate": 0.00018401280945929302,
      "loss": 0.4567,
      "step": 654
    },
    {
      "epoch": 0.24187592319054652,
      "grad_norm": 0.2656030058860779,
      "learning_rate": 0.00018398817588372954,
      "loss": 0.3315,
      "step": 655
    },
    {
      "epoch": 0.24224519940915806,
      "grad_norm": 0.27817684412002563,
      "learning_rate": 0.00018396354230816603,
      "loss": 0.3224,
      "step": 656
    },
    {
      "epoch": 0.24261447562776958,
      "grad_norm": 0.26796382665634155,
      "learning_rate": 0.00018393890873260257,
      "loss": 0.3259,
      "step": 657
    },
    {
      "epoch": 0.2429837518463811,
      "grad_norm": 0.3298114538192749,
      "learning_rate": 0.00018391427515703905,
      "loss": 0.4933,
      "step": 658
    },
    {
      "epoch": 0.24335302806499262,
      "grad_norm": 0.29278478026390076,
      "learning_rate": 0.00018388964158147557,
      "loss": 0.379,
      "step": 659
    },
    {
      "epoch": 0.24372230428360414,
      "grad_norm": 0.3018966317176819,
      "learning_rate": 0.00018386500800591206,
      "loss": 0.3204,
      "step": 660
    },
    {
      "epoch": 0.24409158050221566,
      "grad_norm": 0.26778993010520935,
      "learning_rate": 0.00018384037443034857,
      "loss": 0.3322,
      "step": 661
    },
    {
      "epoch": 0.24446085672082718,
      "grad_norm": 0.4353688061237335,
      "learning_rate": 0.00018381574085478509,
      "loss": 0.4762,
      "step": 662
    },
    {
      "epoch": 0.2448301329394387,
      "grad_norm": 0.3074433505535126,
      "learning_rate": 0.0001837911072792216,
      "loss": 0.4012,
      "step": 663
    },
    {
      "epoch": 0.24519940915805022,
      "grad_norm": 0.29345113039016724,
      "learning_rate": 0.0001837664737036581,
      "loss": 0.3222,
      "step": 664
    },
    {
      "epoch": 0.24556868537666174,
      "grad_norm": 0.3226329982280731,
      "learning_rate": 0.0001837418401280946,
      "loss": 0.4165,
      "step": 665
    },
    {
      "epoch": 0.24593796159527326,
      "grad_norm": 0.28262099623680115,
      "learning_rate": 0.00018371720655253112,
      "loss": 0.3265,
      "step": 666
    },
    {
      "epoch": 0.24630723781388478,
      "grad_norm": 0.3262132704257965,
      "learning_rate": 0.00018369257297696763,
      "loss": 0.3889,
      "step": 667
    },
    {
      "epoch": 0.2466765140324963,
      "grad_norm": 0.26038438081741333,
      "learning_rate": 0.00018366793940140412,
      "loss": 0.3204,
      "step": 668
    },
    {
      "epoch": 0.24704579025110782,
      "grad_norm": 0.4496694803237915,
      "learning_rate": 0.00018364330582584063,
      "loss": 0.2907,
      "step": 669
    },
    {
      "epoch": 0.24741506646971936,
      "grad_norm": 0.2622362971305847,
      "learning_rate": 0.00018361867225027712,
      "loss": 0.3416,
      "step": 670
    },
    {
      "epoch": 0.24778434268833088,
      "grad_norm": 0.27988767623901367,
      "learning_rate": 0.00018359403867471366,
      "loss": 0.3254,
      "step": 671
    },
    {
      "epoch": 0.2481536189069424,
      "grad_norm": 0.23468215763568878,
      "learning_rate": 0.00018356940509915015,
      "loss": 0.3083,
      "step": 672
    },
    {
      "epoch": 0.24852289512555392,
      "grad_norm": 0.2847580909729004,
      "learning_rate": 0.00018354477152358667,
      "loss": 0.3204,
      "step": 673
    },
    {
      "epoch": 0.24889217134416544,
      "grad_norm": 0.2709912657737732,
      "learning_rate": 0.00018352013794802315,
      "loss": 0.3105,
      "step": 674
    },
    {
      "epoch": 0.24926144756277696,
      "grad_norm": 0.2994575798511505,
      "learning_rate": 0.00018349550437245967,
      "loss": 0.3747,
      "step": 675
    },
    {
      "epoch": 0.24963072378138848,
      "grad_norm": 0.2810092270374298,
      "learning_rate": 0.00018347087079689618,
      "loss": 0.3068,
      "step": 676
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.362888902425766,
      "learning_rate": 0.0001834462372213327,
      "loss": 0.3894,
      "step": 677
    },
    {
      "epoch": 0.25036927621861155,
      "grad_norm": 0.26018789410591125,
      "learning_rate": 0.00018342160364576918,
      "loss": 0.3359,
      "step": 678
    },
    {
      "epoch": 0.25073855243722304,
      "grad_norm": 0.28938496112823486,
      "learning_rate": 0.0001833969700702057,
      "loss": 0.3727,
      "step": 679
    },
    {
      "epoch": 0.2511078286558346,
      "grad_norm": 0.4264602065086365,
      "learning_rate": 0.00018337233649464221,
      "loss": 0.4565,
      "step": 680
    },
    {
      "epoch": 0.2514771048744461,
      "grad_norm": 0.3633279800415039,
      "learning_rate": 0.00018334770291907873,
      "loss": 0.3408,
      "step": 681
    },
    {
      "epoch": 0.2518463810930576,
      "grad_norm": 0.3045649528503418,
      "learning_rate": 0.00018332306934351522,
      "loss": 0.3905,
      "step": 682
    },
    {
      "epoch": 0.2522156573116691,
      "grad_norm": 0.3043745756149292,
      "learning_rate": 0.00018329843576795173,
      "loss": 0.4355,
      "step": 683
    },
    {
      "epoch": 0.25258493353028066,
      "grad_norm": 0.2773546874523163,
      "learning_rate": 0.00018327380219238825,
      "loss": 0.3287,
      "step": 684
    },
    {
      "epoch": 0.25295420974889216,
      "grad_norm": 0.4251217842102051,
      "learning_rate": 0.00018324916861682476,
      "loss": 0.4987,
      "step": 685
    },
    {
      "epoch": 0.2533234859675037,
      "grad_norm": 0.25384971499443054,
      "learning_rate": 0.00018322453504126125,
      "loss": 0.3379,
      "step": 686
    },
    {
      "epoch": 0.2536927621861152,
      "grad_norm": 0.3329382538795471,
      "learning_rate": 0.00018319990146569776,
      "loss": 0.3969,
      "step": 687
    },
    {
      "epoch": 0.25406203840472674,
      "grad_norm": 0.30880457162857056,
      "learning_rate": 0.00018317526789013425,
      "loss": 0.3837,
      "step": 688
    },
    {
      "epoch": 0.25443131462333823,
      "grad_norm": 0.2606697380542755,
      "learning_rate": 0.0001831506343145708,
      "loss": 0.3755,
      "step": 689
    },
    {
      "epoch": 0.2548005908419498,
      "grad_norm": 0.2648289203643799,
      "learning_rate": 0.00018312600073900728,
      "loss": 0.3259,
      "step": 690
    },
    {
      "epoch": 0.2551698670605613,
      "grad_norm": 0.2406310737133026,
      "learning_rate": 0.0001831013671634438,
      "loss": 0.2888,
      "step": 691
    },
    {
      "epoch": 0.2555391432791728,
      "grad_norm": 0.3420051336288452,
      "learning_rate": 0.00018307673358788028,
      "loss": 0.3411,
      "step": 692
    },
    {
      "epoch": 0.25590841949778437,
      "grad_norm": 0.2932198643684387,
      "learning_rate": 0.0001830521000123168,
      "loss": 0.3348,
      "step": 693
    },
    {
      "epoch": 0.25627769571639586,
      "grad_norm": 0.29080647230148315,
      "learning_rate": 0.0001830274664367533,
      "loss": 0.3892,
      "step": 694
    },
    {
      "epoch": 0.2566469719350074,
      "grad_norm": 0.32302191853523254,
      "learning_rate": 0.00018300283286118983,
      "loss": 0.3488,
      "step": 695
    },
    {
      "epoch": 0.2570162481536189,
      "grad_norm": 0.29372337460517883,
      "learning_rate": 0.0001829781992856263,
      "loss": 0.3834,
      "step": 696
    },
    {
      "epoch": 0.25738552437223045,
      "grad_norm": 0.2657610774040222,
      "learning_rate": 0.00018295356571006283,
      "loss": 0.2718,
      "step": 697
    },
    {
      "epoch": 0.25775480059084194,
      "grad_norm": 0.2769089639186859,
      "learning_rate": 0.00018292893213449934,
      "loss": 0.3309,
      "step": 698
    },
    {
      "epoch": 0.2581240768094535,
      "grad_norm": 0.3249312937259674,
      "learning_rate": 0.00018290429855893586,
      "loss": 0.4475,
      "step": 699
    },
    {
      "epoch": 0.258493353028065,
      "grad_norm": 0.28939583897590637,
      "learning_rate": 0.00018287966498337234,
      "loss": 0.3775,
      "step": 700
    },
    {
      "epoch": 0.258493353028065,
      "eval_loss": 0.34346112608909607,
      "eval_runtime": 5.8707,
      "eval_samples_per_second": 8.517,
      "eval_steps_per_second": 1.192,
      "step": 700
    },
    {
      "epoch": 0.2588626292466765,
      "grad_norm": 0.3047376871109009,
      "learning_rate": 0.00018285503140780883,
      "loss": 0.3566,
      "step": 701
    },
    {
      "epoch": 0.259231905465288,
      "grad_norm": 0.28955501317977905,
      "learning_rate": 0.00018283039783224535,
      "loss": 0.3964,
      "step": 702
    },
    {
      "epoch": 0.25960118168389956,
      "grad_norm": 0.28000709414482117,
      "learning_rate": 0.00018280576425668186,
      "loss": 0.3614,
      "step": 703
    },
    {
      "epoch": 0.25997045790251105,
      "grad_norm": 0.37217116355895996,
      "learning_rate": 0.00018278113068111838,
      "loss": 0.3892,
      "step": 704
    },
    {
      "epoch": 0.2603397341211226,
      "grad_norm": 0.2819831967353821,
      "learning_rate": 0.00018275649710555486,
      "loss": 0.3415,
      "step": 705
    },
    {
      "epoch": 0.26070901033973415,
      "grad_norm": 0.28753209114074707,
      "learning_rate": 0.00018273186352999138,
      "loss": 0.2532,
      "step": 706
    },
    {
      "epoch": 0.26107828655834564,
      "grad_norm": 0.33074823021888733,
      "learning_rate": 0.0001827072299544279,
      "loss": 0.3379,
      "step": 707
    },
    {
      "epoch": 0.2614475627769572,
      "grad_norm": 0.26290759444236755,
      "learning_rate": 0.0001826825963788644,
      "loss": 0.3852,
      "step": 708
    },
    {
      "epoch": 0.2618168389955687,
      "grad_norm": 0.281848281621933,
      "learning_rate": 0.0001826579628033009,
      "loss": 0.3215,
      "step": 709
    },
    {
      "epoch": 0.2621861152141802,
      "grad_norm": 0.27442625164985657,
      "learning_rate": 0.0001826333292277374,
      "loss": 0.3497,
      "step": 710
    },
    {
      "epoch": 0.2625553914327917,
      "grad_norm": 0.32699814438819885,
      "learning_rate": 0.00018260869565217392,
      "loss": 0.2898,
      "step": 711
    },
    {
      "epoch": 0.26292466765140327,
      "grad_norm": 0.2687341570854187,
      "learning_rate": 0.00018258406207661044,
      "loss": 0.2902,
      "step": 712
    },
    {
      "epoch": 0.26329394387001476,
      "grad_norm": 0.29069650173187256,
      "learning_rate": 0.00018255942850104693,
      "loss": 0.3481,
      "step": 713
    },
    {
      "epoch": 0.2636632200886263,
      "grad_norm": 0.28157898783683777,
      "learning_rate": 0.00018253479492548344,
      "loss": 0.3434,
      "step": 714
    },
    {
      "epoch": 0.2640324963072378,
      "grad_norm": 0.3608490824699402,
      "learning_rate": 0.00018251016134991993,
      "loss": 0.3482,
      "step": 715
    },
    {
      "epoch": 0.26440177252584934,
      "grad_norm": 0.25900065898895264,
      "learning_rate": 0.00018248552777435647,
      "loss": 0.3285,
      "step": 716
    },
    {
      "epoch": 0.26477104874446084,
      "grad_norm": 0.32958030700683594,
      "learning_rate": 0.00018246089419879296,
      "loss": 0.4435,
      "step": 717
    },
    {
      "epoch": 0.2651403249630724,
      "grad_norm": 0.26965761184692383,
      "learning_rate": 0.00018243626062322947,
      "loss": 0.3037,
      "step": 718
    },
    {
      "epoch": 0.2655096011816839,
      "grad_norm": 0.30884167551994324,
      "learning_rate": 0.00018241162704766596,
      "loss": 0.3645,
      "step": 719
    },
    {
      "epoch": 0.2658788774002954,
      "grad_norm": 0.3773479163646698,
      "learning_rate": 0.00018238699347210247,
      "loss": 0.3808,
      "step": 720
    },
    {
      "epoch": 0.26624815361890697,
      "grad_norm": 0.3207281231880188,
      "learning_rate": 0.000182362359896539,
      "loss": 0.3381,
      "step": 721
    },
    {
      "epoch": 0.26661742983751846,
      "grad_norm": 0.28987714648246765,
      "learning_rate": 0.0001823377263209755,
      "loss": 0.335,
      "step": 722
    },
    {
      "epoch": 0.26698670605613,
      "grad_norm": 0.2974066436290741,
      "learning_rate": 0.000182313092745412,
      "loss": 0.3933,
      "step": 723
    },
    {
      "epoch": 0.2673559822747415,
      "grad_norm": 0.3069154620170593,
      "learning_rate": 0.0001822884591698485,
      "loss": 0.4665,
      "step": 724
    },
    {
      "epoch": 0.26772525849335305,
      "grad_norm": 0.32729649543762207,
      "learning_rate": 0.00018226382559428502,
      "loss": 0.3923,
      "step": 725
    },
    {
      "epoch": 0.26809453471196454,
      "grad_norm": 0.3093666136264801,
      "learning_rate": 0.00018223919201872154,
      "loss": 0.4225,
      "step": 726
    },
    {
      "epoch": 0.2684638109305761,
      "grad_norm": 0.29534193873405457,
      "learning_rate": 0.00018221455844315802,
      "loss": 0.3724,
      "step": 727
    },
    {
      "epoch": 0.2688330871491876,
      "grad_norm": 0.27556416392326355,
      "learning_rate": 0.00018218992486759454,
      "loss": 0.3039,
      "step": 728
    },
    {
      "epoch": 0.2692023633677991,
      "grad_norm": 0.291860967874527,
      "learning_rate": 0.00018216529129203103,
      "loss": 0.3996,
      "step": 729
    },
    {
      "epoch": 0.2695716395864106,
      "grad_norm": 0.25631803274154663,
      "learning_rate": 0.00018214065771646757,
      "loss": 0.326,
      "step": 730
    },
    {
      "epoch": 0.26994091580502216,
      "grad_norm": 0.2968412935733795,
      "learning_rate": 0.00018211602414090405,
      "loss": 0.3103,
      "step": 731
    },
    {
      "epoch": 0.27031019202363366,
      "grad_norm": 0.2535908818244934,
      "learning_rate": 0.00018209139056534057,
      "loss": 0.293,
      "step": 732
    },
    {
      "epoch": 0.2706794682422452,
      "grad_norm": 0.2964654266834259,
      "learning_rate": 0.00018206675698977706,
      "loss": 0.3582,
      "step": 733
    },
    {
      "epoch": 0.2710487444608567,
      "grad_norm": 0.2881055772304535,
      "learning_rate": 0.00018204212341421357,
      "loss": 0.3766,
      "step": 734
    },
    {
      "epoch": 0.27141802067946824,
      "grad_norm": 0.3032318949699402,
      "learning_rate": 0.00018201748983865009,
      "loss": 0.3798,
      "step": 735
    },
    {
      "epoch": 0.2717872968980798,
      "grad_norm": 0.28385844826698303,
      "learning_rate": 0.0001819928562630866,
      "loss": 0.3321,
      "step": 736
    },
    {
      "epoch": 0.2721565731166913,
      "grad_norm": 0.3154700994491577,
      "learning_rate": 0.0001819682226875231,
      "loss": 0.3764,
      "step": 737
    },
    {
      "epoch": 0.2725258493353028,
      "grad_norm": 0.2877965271472931,
      "learning_rate": 0.0001819435891119596,
      "loss": 0.3417,
      "step": 738
    },
    {
      "epoch": 0.2728951255539143,
      "grad_norm": 0.33885276317596436,
      "learning_rate": 0.00018191895553639612,
      "loss": 0.4016,
      "step": 739
    },
    {
      "epoch": 0.27326440177252587,
      "grad_norm": 0.33902284502983093,
      "learning_rate": 0.00018189432196083263,
      "loss": 0.3702,
      "step": 740
    },
    {
      "epoch": 0.27363367799113736,
      "grad_norm": 0.3601471483707428,
      "learning_rate": 0.00018186968838526912,
      "loss": 0.3286,
      "step": 741
    },
    {
      "epoch": 0.2740029542097489,
      "grad_norm": 0.298261821269989,
      "learning_rate": 0.00018184505480970563,
      "loss": 0.3901,
      "step": 742
    },
    {
      "epoch": 0.2743722304283604,
      "grad_norm": 0.35861852765083313,
      "learning_rate": 0.00018182042123414215,
      "loss": 0.4522,
      "step": 743
    },
    {
      "epoch": 0.27474150664697194,
      "grad_norm": 0.31579142808914185,
      "learning_rate": 0.00018179578765857866,
      "loss": 0.4179,
      "step": 744
    },
    {
      "epoch": 0.27511078286558344,
      "grad_norm": 0.32154661417007446,
      "learning_rate": 0.00018177115408301515,
      "loss": 0.4405,
      "step": 745
    },
    {
      "epoch": 0.275480059084195,
      "grad_norm": 0.28148001432418823,
      "learning_rate": 0.00018174652050745167,
      "loss": 0.3703,
      "step": 746
    },
    {
      "epoch": 0.2758493353028065,
      "grad_norm": 0.3106623888015747,
      "learning_rate": 0.00018172188693188815,
      "loss": 0.3184,
      "step": 747
    },
    {
      "epoch": 0.276218611521418,
      "grad_norm": 0.32666391134262085,
      "learning_rate": 0.0001816972533563247,
      "loss": 0.3548,
      "step": 748
    },
    {
      "epoch": 0.2765878877400295,
      "grad_norm": 0.3134174942970276,
      "learning_rate": 0.00018167261978076118,
      "loss": 0.3829,
      "step": 749
    },
    {
      "epoch": 0.27695716395864106,
      "grad_norm": 0.33211180567741394,
      "learning_rate": 0.0001816479862051977,
      "loss": 0.3714,
      "step": 750
    },
    {
      "epoch": 0.27695716395864106,
      "eval_loss": 0.3437245190143585,
      "eval_runtime": 5.8628,
      "eval_samples_per_second": 8.528,
      "eval_steps_per_second": 1.194,
      "step": 750
    },
    {
      "epoch": 0.2773264401772526,
      "grad_norm": 0.2977505028247833,
      "learning_rate": 0.00018162335262963418,
      "loss": 0.3801,
      "step": 751
    },
    {
      "epoch": 0.2776957163958641,
      "grad_norm": 0.30672118067741394,
      "learning_rate": 0.0001815987190540707,
      "loss": 0.3891,
      "step": 752
    },
    {
      "epoch": 0.27806499261447565,
      "grad_norm": 0.22556626796722412,
      "learning_rate": 0.00018157408547850721,
      "loss": 0.2849,
      "step": 753
    },
    {
      "epoch": 0.27843426883308714,
      "grad_norm": 0.30615079402923584,
      "learning_rate": 0.00018154945190294373,
      "loss": 0.4246,
      "step": 754
    },
    {
      "epoch": 0.2788035450516987,
      "grad_norm": 0.2854730486869812,
      "learning_rate": 0.00018152481832738022,
      "loss": 0.3344,
      "step": 755
    },
    {
      "epoch": 0.2791728212703102,
      "grad_norm": 0.2818675935268402,
      "learning_rate": 0.00018150018475181673,
      "loss": 0.3566,
      "step": 756
    },
    {
      "epoch": 0.2795420974889217,
      "grad_norm": 0.27592697739601135,
      "learning_rate": 0.00018147555117625325,
      "loss": 0.3282,
      "step": 757
    },
    {
      "epoch": 0.2799113737075332,
      "grad_norm": 0.29141223430633545,
      "learning_rate": 0.00018145091760068976,
      "loss": 0.4406,
      "step": 758
    },
    {
      "epoch": 0.28028064992614476,
      "grad_norm": 0.31673353910446167,
      "learning_rate": 0.00018142628402512625,
      "loss": 0.4559,
      "step": 759
    },
    {
      "epoch": 0.28064992614475626,
      "grad_norm": 0.2683919370174408,
      "learning_rate": 0.00018140165044956276,
      "loss": 0.3207,
      "step": 760
    },
    {
      "epoch": 0.2810192023633678,
      "grad_norm": 0.3255873918533325,
      "learning_rate": 0.00018137701687399925,
      "loss": 0.3369,
      "step": 761
    },
    {
      "epoch": 0.2813884785819793,
      "grad_norm": 0.273697167634964,
      "learning_rate": 0.0001813523832984358,
      "loss": 0.3523,
      "step": 762
    },
    {
      "epoch": 0.28175775480059084,
      "grad_norm": 0.32450446486473083,
      "learning_rate": 0.00018132774972287228,
      "loss": 0.3772,
      "step": 763
    },
    {
      "epoch": 0.2821270310192024,
      "grad_norm": 0.27704933285713196,
      "learning_rate": 0.0001813031161473088,
      "loss": 0.3428,
      "step": 764
    },
    {
      "epoch": 0.2824963072378139,
      "grad_norm": 0.27535080909729004,
      "learning_rate": 0.00018127848257174528,
      "loss": 0.3595,
      "step": 765
    },
    {
      "epoch": 0.28286558345642543,
      "grad_norm": 0.2901923656463623,
      "learning_rate": 0.0001812538489961818,
      "loss": 0.3636,
      "step": 766
    },
    {
      "epoch": 0.2832348596750369,
      "grad_norm": 0.2620357573032379,
      "learning_rate": 0.0001812292154206183,
      "loss": 0.3531,
      "step": 767
    },
    {
      "epoch": 0.28360413589364847,
      "grad_norm": 0.534293532371521,
      "learning_rate": 0.00018120458184505482,
      "loss": 0.3886,
      "step": 768
    },
    {
      "epoch": 0.28397341211225996,
      "grad_norm": 0.24337033927440643,
      "learning_rate": 0.0001811799482694913,
      "loss": 0.2845,
      "step": 769
    },
    {
      "epoch": 0.2843426883308715,
      "grad_norm": 0.3237406611442566,
      "learning_rate": 0.00018115531469392783,
      "loss": 0.372,
      "step": 770
    },
    {
      "epoch": 0.284711964549483,
      "grad_norm": 0.29243141412734985,
      "learning_rate": 0.00018113068111836434,
      "loss": 0.3658,
      "step": 771
    },
    {
      "epoch": 0.28508124076809455,
      "grad_norm": 0.2752094268798828,
      "learning_rate": 0.00018110604754280086,
      "loss": 0.4016,
      "step": 772
    },
    {
      "epoch": 0.28545051698670604,
      "grad_norm": 0.29873543977737427,
      "learning_rate": 0.00018108141396723734,
      "loss": 0.4039,
      "step": 773
    },
    {
      "epoch": 0.2858197932053176,
      "grad_norm": 0.3566305637359619,
      "learning_rate": 0.00018105678039167386,
      "loss": 0.3922,
      "step": 774
    },
    {
      "epoch": 0.2861890694239291,
      "grad_norm": 0.24314096570014954,
      "learning_rate": 0.00018103214681611035,
      "loss": 0.2998,
      "step": 775
    },
    {
      "epoch": 0.2865583456425406,
      "grad_norm": 0.49944359064102173,
      "learning_rate": 0.0001810075132405469,
      "loss": 0.3827,
      "step": 776
    },
    {
      "epoch": 0.2869276218611521,
      "grad_norm": 0.3388037085533142,
      "learning_rate": 0.00018098287966498338,
      "loss": 0.4065,
      "step": 777
    },
    {
      "epoch": 0.28729689807976366,
      "grad_norm": 0.31323766708374023,
      "learning_rate": 0.0001809582460894199,
      "loss": 0.3669,
      "step": 778
    },
    {
      "epoch": 0.2876661742983752,
      "grad_norm": 0.3184468746185303,
      "learning_rate": 0.00018093361251385638,
      "loss": 0.3813,
      "step": 779
    },
    {
      "epoch": 0.2880354505169867,
      "grad_norm": 0.35029998421669006,
      "learning_rate": 0.0001809089789382929,
      "loss": 0.3933,
      "step": 780
    },
    {
      "epoch": 0.28840472673559825,
      "grad_norm": 0.28155091404914856,
      "learning_rate": 0.0001808843453627294,
      "loss": 0.3198,
      "step": 781
    },
    {
      "epoch": 0.28877400295420974,
      "grad_norm": 0.28609567880630493,
      "learning_rate": 0.00018085971178716592,
      "loss": 0.3188,
      "step": 782
    },
    {
      "epoch": 0.2891432791728213,
      "grad_norm": 0.3034074008464813,
      "learning_rate": 0.0001808350782116024,
      "loss": 0.4208,
      "step": 783
    },
    {
      "epoch": 0.2895125553914328,
      "grad_norm": 0.27176764607429504,
      "learning_rate": 0.00018081044463603892,
      "loss": 0.3128,
      "step": 784
    },
    {
      "epoch": 0.2898818316100443,
      "grad_norm": 0.42909035086631775,
      "learning_rate": 0.00018078581106047544,
      "loss": 0.4614,
      "step": 785
    },
    {
      "epoch": 0.2902511078286558,
      "grad_norm": 0.26615554094314575,
      "learning_rate": 0.00018076117748491195,
      "loss": 0.3133,
      "step": 786
    },
    {
      "epoch": 0.29062038404726737,
      "grad_norm": 0.28979459404945374,
      "learning_rate": 0.00018073654390934844,
      "loss": 0.3536,
      "step": 787
    },
    {
      "epoch": 0.29098966026587886,
      "grad_norm": 0.266000896692276,
      "learning_rate": 0.00018071191033378496,
      "loss": 0.3465,
      "step": 788
    },
    {
      "epoch": 0.2913589364844904,
      "grad_norm": 0.2925066351890564,
      "learning_rate": 0.00018068727675822147,
      "loss": 0.439,
      "step": 789
    },
    {
      "epoch": 0.2917282127031019,
      "grad_norm": 0.284578412771225,
      "learning_rate": 0.00018066264318265798,
      "loss": 0.3795,
      "step": 790
    },
    {
      "epoch": 0.29209748892171344,
      "grad_norm": 0.2803080379962921,
      "learning_rate": 0.00018063800960709447,
      "loss": 0.3622,
      "step": 791
    },
    {
      "epoch": 0.29246676514032494,
      "grad_norm": 0.2882033586502075,
      "learning_rate": 0.000180613376031531,
      "loss": 0.3518,
      "step": 792
    },
    {
      "epoch": 0.2928360413589365,
      "grad_norm": 0.2589658200740814,
      "learning_rate": 0.00018058874245596747,
      "loss": 0.3091,
      "step": 793
    },
    {
      "epoch": 0.29320531757754803,
      "grad_norm": 0.2796440124511719,
      "learning_rate": 0.00018056410888040402,
      "loss": 0.3395,
      "step": 794
    },
    {
      "epoch": 0.2935745937961595,
      "grad_norm": 0.2894507050514221,
      "learning_rate": 0.0001805394753048405,
      "loss": 0.3363,
      "step": 795
    },
    {
      "epoch": 0.29394387001477107,
      "grad_norm": 0.29944106936454773,
      "learning_rate": 0.00018051484172927702,
      "loss": 0.3681,
      "step": 796
    },
    {
      "epoch": 0.29431314623338256,
      "grad_norm": 0.26254385709762573,
      "learning_rate": 0.0001804902081537135,
      "loss": 0.299,
      "step": 797
    },
    {
      "epoch": 0.2946824224519941,
      "grad_norm": 0.28542569279670715,
      "learning_rate": 0.00018046557457815002,
      "loss": 0.3372,
      "step": 798
    },
    {
      "epoch": 0.2950516986706056,
      "grad_norm": 0.3604605495929718,
      "learning_rate": 0.00018044094100258653,
      "loss": 0.405,
      "step": 799
    },
    {
      "epoch": 0.29542097488921715,
      "grad_norm": 0.25007471442222595,
      "learning_rate": 0.00018041630742702305,
      "loss": 0.3783,
      "step": 800
    },
    {
      "epoch": 0.29542097488921715,
      "eval_loss": 0.3364347219467163,
      "eval_runtime": 5.8652,
      "eval_samples_per_second": 8.525,
      "eval_steps_per_second": 1.193,
      "step": 800
    },
    {
      "epoch": 0.29579025110782864,
      "grad_norm": 0.28190720081329346,
      "learning_rate": 0.00018039167385145954,
      "loss": 0.3105,
      "step": 801
    },
    {
      "epoch": 0.2961595273264402,
      "grad_norm": 0.2979622483253479,
      "learning_rate": 0.00018036704027589605,
      "loss": 0.3427,
      "step": 802
    },
    {
      "epoch": 0.2965288035450517,
      "grad_norm": 0.30916163325309753,
      "learning_rate": 0.00018034240670033257,
      "loss": 0.3733,
      "step": 803
    },
    {
      "epoch": 0.2968980797636632,
      "grad_norm": 0.31245294213294983,
      "learning_rate": 0.00018031777312476908,
      "loss": 0.3744,
      "step": 804
    },
    {
      "epoch": 0.2972673559822747,
      "grad_norm": 0.2838025987148285,
      "learning_rate": 0.00018029313954920557,
      "loss": 0.3682,
      "step": 805
    },
    {
      "epoch": 0.29763663220088626,
      "grad_norm": 0.3152095079421997,
      "learning_rate": 0.00018026850597364208,
      "loss": 0.4009,
      "step": 806
    },
    {
      "epoch": 0.2980059084194978,
      "grad_norm": 0.2876656651496887,
      "learning_rate": 0.00018024387239807857,
      "loss": 0.3572,
      "step": 807
    },
    {
      "epoch": 0.2983751846381093,
      "grad_norm": 0.31334713101387024,
      "learning_rate": 0.0001802192388225151,
      "loss": 0.4065,
      "step": 808
    },
    {
      "epoch": 0.29874446085672085,
      "grad_norm": 0.2907058298587799,
      "learning_rate": 0.0001801946052469516,
      "loss": 0.406,
      "step": 809
    },
    {
      "epoch": 0.29911373707533234,
      "grad_norm": 0.25115716457366943,
      "learning_rate": 0.00018016997167138811,
      "loss": 0.3331,
      "step": 810
    },
    {
      "epoch": 0.2994830132939439,
      "grad_norm": 0.2785030007362366,
      "learning_rate": 0.0001801453380958246,
      "loss": 0.3398,
      "step": 811
    },
    {
      "epoch": 0.2998522895125554,
      "grad_norm": 0.2514455020427704,
      "learning_rate": 0.00018012070452026112,
      "loss": 0.3256,
      "step": 812
    },
    {
      "epoch": 0.30022156573116693,
      "grad_norm": 0.2781217396259308,
      "learning_rate": 0.00018009607094469763,
      "loss": 0.3162,
      "step": 813
    },
    {
      "epoch": 0.3005908419497784,
      "grad_norm": 0.2895994484424591,
      "learning_rate": 0.00018007143736913415,
      "loss": 0.3805,
      "step": 814
    },
    {
      "epoch": 0.30096011816838997,
      "grad_norm": 0.2785027027130127,
      "learning_rate": 0.00018004680379357063,
      "loss": 0.2684,
      "step": 815
    },
    {
      "epoch": 0.30132939438700146,
      "grad_norm": 0.2652154862880707,
      "learning_rate": 0.00018002217021800715,
      "loss": 0.2917,
      "step": 816
    },
    {
      "epoch": 0.301698670605613,
      "grad_norm": 0.28336066007614136,
      "learning_rate": 0.00017999753664244366,
      "loss": 0.3594,
      "step": 817
    },
    {
      "epoch": 0.3020679468242245,
      "grad_norm": 0.34773701429367065,
      "learning_rate": 0.00017997290306688018,
      "loss": 0.4795,
      "step": 818
    },
    {
      "epoch": 0.30243722304283605,
      "grad_norm": 0.3035285174846649,
      "learning_rate": 0.00017994826949131666,
      "loss": 0.3498,
      "step": 819
    },
    {
      "epoch": 0.30280649926144754,
      "grad_norm": 0.31368377804756165,
      "learning_rate": 0.00017992363591575318,
      "loss": 0.3344,
      "step": 820
    },
    {
      "epoch": 0.3031757754800591,
      "grad_norm": 0.2675492763519287,
      "learning_rate": 0.0001798990023401897,
      "loss": 0.2946,
      "step": 821
    },
    {
      "epoch": 0.30354505169867063,
      "grad_norm": 0.242357075214386,
      "learning_rate": 0.0001798743687646262,
      "loss": 0.3179,
      "step": 822
    },
    {
      "epoch": 0.3039143279172821,
      "grad_norm": 0.32820671796798706,
      "learning_rate": 0.0001798497351890627,
      "loss": 0.3931,
      "step": 823
    },
    {
      "epoch": 0.30428360413589367,
      "grad_norm": 0.2815065085887909,
      "learning_rate": 0.0001798251016134992,
      "loss": 0.361,
      "step": 824
    },
    {
      "epoch": 0.30465288035450516,
      "grad_norm": 0.313423752784729,
      "learning_rate": 0.0001798004680379357,
      "loss": 0.38,
      "step": 825
    },
    {
      "epoch": 0.3050221565731167,
      "grad_norm": 0.3085949420928955,
      "learning_rate": 0.00017977583446237224,
      "loss": 0.4053,
      "step": 826
    },
    {
      "epoch": 0.3053914327917282,
      "grad_norm": 0.3582143783569336,
      "learning_rate": 0.00017975120088680873,
      "loss": 0.3648,
      "step": 827
    },
    {
      "epoch": 0.30576070901033975,
      "grad_norm": 0.2894408106803894,
      "learning_rate": 0.00017972656731124524,
      "loss": 0.3525,
      "step": 828
    },
    {
      "epoch": 0.30612998522895124,
      "grad_norm": 0.2721264362335205,
      "learning_rate": 0.00017970193373568173,
      "loss": 0.3277,
      "step": 829
    },
    {
      "epoch": 0.3064992614475628,
      "grad_norm": 0.2568438947200775,
      "learning_rate": 0.00017967730016011824,
      "loss": 0.3054,
      "step": 830
    },
    {
      "epoch": 0.3068685376661743,
      "grad_norm": 0.29634156823158264,
      "learning_rate": 0.00017965266658455476,
      "loss": 0.3478,
      "step": 831
    },
    {
      "epoch": 0.3072378138847858,
      "grad_norm": 0.3144989311695099,
      "learning_rate": 0.00017962803300899127,
      "loss": 0.3488,
      "step": 832
    },
    {
      "epoch": 0.3076070901033973,
      "grad_norm": 0.316948801279068,
      "learning_rate": 0.00017960339943342776,
      "loss": 0.3406,
      "step": 833
    },
    {
      "epoch": 0.30797636632200887,
      "grad_norm": 0.3339119851589203,
      "learning_rate": 0.00017957876585786428,
      "loss": 0.3856,
      "step": 834
    },
    {
      "epoch": 0.30834564254062036,
      "grad_norm": 0.3087283670902252,
      "learning_rate": 0.0001795541322823008,
      "loss": 0.3235,
      "step": 835
    },
    {
      "epoch": 0.3087149187592319,
      "grad_norm": 0.25156664848327637,
      "learning_rate": 0.0001795294987067373,
      "loss": 0.3036,
      "step": 836
    },
    {
      "epoch": 0.30908419497784345,
      "grad_norm": 0.25502482056617737,
      "learning_rate": 0.0001795048651311738,
      "loss": 0.3215,
      "step": 837
    },
    {
      "epoch": 0.30945347119645494,
      "grad_norm": 0.2689816355705261,
      "learning_rate": 0.0001794802315556103,
      "loss": 0.348,
      "step": 838
    },
    {
      "epoch": 0.3098227474150665,
      "grad_norm": 0.4214155972003937,
      "learning_rate": 0.0001794555979800468,
      "loss": 0.3808,
      "step": 839
    },
    {
      "epoch": 0.310192023633678,
      "grad_norm": 0.2970890998840332,
      "learning_rate": 0.00017943096440448334,
      "loss": 0.3542,
      "step": 840
    },
    {
      "epoch": 0.31056129985228953,
      "grad_norm": 0.23918575048446655,
      "learning_rate": 0.00017940633082891982,
      "loss": 0.2426,
      "step": 841
    },
    {
      "epoch": 0.310930576070901,
      "grad_norm": 0.28873178362846375,
      "learning_rate": 0.00017938169725335634,
      "loss": 0.4068,
      "step": 842
    },
    {
      "epoch": 0.31129985228951257,
      "grad_norm": 0.3294038474559784,
      "learning_rate": 0.00017935706367779283,
      "loss": 0.4282,
      "step": 843
    },
    {
      "epoch": 0.31166912850812406,
      "grad_norm": 0.28786158561706543,
      "learning_rate": 0.00017933243010222934,
      "loss": 0.3551,
      "step": 844
    },
    {
      "epoch": 0.3120384047267356,
      "grad_norm": 0.3132387697696686,
      "learning_rate": 0.00017930779652666586,
      "loss": 0.3127,
      "step": 845
    },
    {
      "epoch": 0.3124076809453471,
      "grad_norm": 0.25213518738746643,
      "learning_rate": 0.00017928316295110237,
      "loss": 0.2984,
      "step": 846
    },
    {
      "epoch": 0.31277695716395865,
      "grad_norm": 0.2759394347667694,
      "learning_rate": 0.00017925852937553886,
      "loss": 0.326,
      "step": 847
    },
    {
      "epoch": 0.31314623338257014,
      "grad_norm": 0.27484017610549927,
      "learning_rate": 0.00017923389579997537,
      "loss": 0.2922,
      "step": 848
    },
    {
      "epoch": 0.3135155096011817,
      "grad_norm": 0.2975594103336334,
      "learning_rate": 0.0001792092622244119,
      "loss": 0.3467,
      "step": 849
    },
    {
      "epoch": 0.31388478581979323,
      "grad_norm": 0.3532097339630127,
      "learning_rate": 0.0001791846286488484,
      "loss": 0.386,
      "step": 850
    },
    {
      "epoch": 0.31388478581979323,
      "eval_loss": 0.33392348885536194,
      "eval_runtime": 5.8583,
      "eval_samples_per_second": 8.535,
      "eval_steps_per_second": 1.195,
      "step": 850
    },
    {
      "epoch": 0.3142540620384047,
      "grad_norm": 0.24326710402965546,
      "learning_rate": 0.0001791599950732849,
      "loss": 0.2547,
      "step": 851
    },
    {
      "epoch": 0.31462333825701627,
      "grad_norm": 0.2544013261795044,
      "learning_rate": 0.0001791353614977214,
      "loss": 0.3396,
      "step": 852
    },
    {
      "epoch": 0.31499261447562776,
      "grad_norm": 0.3768573999404907,
      "learning_rate": 0.00017911072792215792,
      "loss": 0.3424,
      "step": 853
    },
    {
      "epoch": 0.3153618906942393,
      "grad_norm": 0.3502283990383148,
      "learning_rate": 0.00017908609434659443,
      "loss": 0.4177,
      "step": 854
    },
    {
      "epoch": 0.3157311669128508,
      "grad_norm": 0.29516372084617615,
      "learning_rate": 0.00017906146077103092,
      "loss": 0.4256,
      "step": 855
    },
    {
      "epoch": 0.31610044313146235,
      "grad_norm": 0.27422836422920227,
      "learning_rate": 0.00017903682719546744,
      "loss": 0.3216,
      "step": 856
    },
    {
      "epoch": 0.31646971935007384,
      "grad_norm": 0.2891975939273834,
      "learning_rate": 0.00017901219361990392,
      "loss": 0.2996,
      "step": 857
    },
    {
      "epoch": 0.3168389955686854,
      "grad_norm": 0.34916412830352783,
      "learning_rate": 0.00017898756004434046,
      "loss": 0.3324,
      "step": 858
    },
    {
      "epoch": 0.3172082717872969,
      "grad_norm": 0.35271620750427246,
      "learning_rate": 0.00017896292646877695,
      "loss": 0.405,
      "step": 859
    },
    {
      "epoch": 0.3175775480059084,
      "grad_norm": 0.251388818025589,
      "learning_rate": 0.00017893829289321347,
      "loss": 0.3203,
      "step": 860
    },
    {
      "epoch": 0.3179468242245199,
      "grad_norm": 0.2781042754650116,
      "learning_rate": 0.00017891365931764995,
      "loss": 0.3063,
      "step": 861
    },
    {
      "epoch": 0.31831610044313147,
      "grad_norm": 0.2542610168457031,
      "learning_rate": 0.00017888902574208647,
      "loss": 0.3106,
      "step": 862
    },
    {
      "epoch": 0.31868537666174296,
      "grad_norm": 0.2617054283618927,
      "learning_rate": 0.00017886439216652298,
      "loss": 0.2967,
      "step": 863
    },
    {
      "epoch": 0.3190546528803545,
      "grad_norm": 0.27979138493537903,
      "learning_rate": 0.0001788397585909595,
      "loss": 0.2823,
      "step": 864
    },
    {
      "epoch": 0.31942392909896605,
      "grad_norm": 0.23414112627506256,
      "learning_rate": 0.00017881512501539599,
      "loss": 0.2796,
      "step": 865
    },
    {
      "epoch": 0.31979320531757754,
      "grad_norm": 0.30112823843955994,
      "learning_rate": 0.0001787904914398325,
      "loss": 0.3338,
      "step": 866
    },
    {
      "epoch": 0.3201624815361891,
      "grad_norm": 0.3144896924495697,
      "learning_rate": 0.00017876585786426902,
      "loss": 0.3887,
      "step": 867
    },
    {
      "epoch": 0.3205317577548006,
      "grad_norm": 0.34166356921195984,
      "learning_rate": 0.00017874122428870553,
      "loss": 0.4369,
      "step": 868
    },
    {
      "epoch": 0.32090103397341213,
      "grad_norm": 0.2831858694553375,
      "learning_rate": 0.00017871659071314202,
      "loss": 0.2869,
      "step": 869
    },
    {
      "epoch": 0.3212703101920236,
      "grad_norm": 0.3323805332183838,
      "learning_rate": 0.00017869195713757853,
      "loss": 0.3173,
      "step": 870
    },
    {
      "epoch": 0.32163958641063517,
      "grad_norm": 0.27442899346351624,
      "learning_rate": 0.00017866732356201502,
      "loss": 0.2456,
      "step": 871
    },
    {
      "epoch": 0.32200886262924666,
      "grad_norm": 0.25058212876319885,
      "learning_rate": 0.00017864268998645156,
      "loss": 0.3411,
      "step": 872
    },
    {
      "epoch": 0.3223781388478582,
      "grad_norm": 0.24768081307411194,
      "learning_rate": 0.00017861805641088805,
      "loss": 0.3238,
      "step": 873
    },
    {
      "epoch": 0.3227474150664697,
      "grad_norm": 0.3337574005126953,
      "learning_rate": 0.00017859342283532456,
      "loss": 0.4814,
      "step": 874
    },
    {
      "epoch": 0.32311669128508125,
      "grad_norm": 0.2864474058151245,
      "learning_rate": 0.00017856878925976105,
      "loss": 0.3523,
      "step": 875
    },
    {
      "epoch": 0.32348596750369274,
      "grad_norm": 0.27543604373931885,
      "learning_rate": 0.00017854415568419757,
      "loss": 0.3394,
      "step": 876
    },
    {
      "epoch": 0.3238552437223043,
      "grad_norm": 0.2650769352912903,
      "learning_rate": 0.00017851952210863408,
      "loss": 0.3473,
      "step": 877
    },
    {
      "epoch": 0.3242245199409158,
      "grad_norm": 0.29300224781036377,
      "learning_rate": 0.0001784948885330706,
      "loss": 0.3884,
      "step": 878
    },
    {
      "epoch": 0.3245937961595273,
      "grad_norm": 0.3521723747253418,
      "learning_rate": 0.00017847025495750708,
      "loss": 0.419,
      "step": 879
    },
    {
      "epoch": 0.3249630723781389,
      "grad_norm": 0.28816041350364685,
      "learning_rate": 0.0001784456213819436,
      "loss": 0.3261,
      "step": 880
    },
    {
      "epoch": 0.32533234859675036,
      "grad_norm": 0.27399736642837524,
      "learning_rate": 0.0001784209878063801,
      "loss": 0.3483,
      "step": 881
    },
    {
      "epoch": 0.3257016248153619,
      "grad_norm": 0.31869786977767944,
      "learning_rate": 0.00017839635423081663,
      "loss": 0.4149,
      "step": 882
    },
    {
      "epoch": 0.3260709010339734,
      "grad_norm": 0.26290929317474365,
      "learning_rate": 0.00017837172065525311,
      "loss": 0.3341,
      "step": 883
    },
    {
      "epoch": 0.32644017725258495,
      "grad_norm": 0.278089702129364,
      "learning_rate": 0.00017834708707968963,
      "loss": 0.4107,
      "step": 884
    },
    {
      "epoch": 0.32680945347119644,
      "grad_norm": 0.31756216287612915,
      "learning_rate": 0.00017832245350412614,
      "loss": 0.4187,
      "step": 885
    },
    {
      "epoch": 0.327178729689808,
      "grad_norm": 0.3544643223285675,
      "learning_rate": 0.00017829781992856266,
      "loss": 0.4412,
      "step": 886
    },
    {
      "epoch": 0.3275480059084195,
      "grad_norm": 0.31643715500831604,
      "learning_rate": 0.00017827318635299915,
      "loss": 0.3393,
      "step": 887
    },
    {
      "epoch": 0.32791728212703103,
      "grad_norm": 0.30142152309417725,
      "learning_rate": 0.00017824855277743566,
      "loss": 0.4242,
      "step": 888
    },
    {
      "epoch": 0.3282865583456425,
      "grad_norm": 0.2803404629230499,
      "learning_rate": 0.00017822391920187215,
      "loss": 0.3514,
      "step": 889
    },
    {
      "epoch": 0.32865583456425407,
      "grad_norm": 0.3276580572128296,
      "learning_rate": 0.0001781992856263087,
      "loss": 0.4045,
      "step": 890
    },
    {
      "epoch": 0.32902511078286556,
      "grad_norm": 0.24273940920829773,
      "learning_rate": 0.00017817465205074518,
      "loss": 0.3047,
      "step": 891
    },
    {
      "epoch": 0.3293943870014771,
      "grad_norm": 0.2541992664337158,
      "learning_rate": 0.0001781500184751817,
      "loss": 0.3145,
      "step": 892
    },
    {
      "epoch": 0.3297636632200886,
      "grad_norm": 0.2850353419780731,
      "learning_rate": 0.00017812538489961818,
      "loss": 0.3145,
      "step": 893
    },
    {
      "epoch": 0.33013293943870015,
      "grad_norm": 0.2915472686290741,
      "learning_rate": 0.0001781007513240547,
      "loss": 0.3703,
      "step": 894
    },
    {
      "epoch": 0.3305022156573117,
      "grad_norm": 0.2595556080341339,
      "learning_rate": 0.0001780761177484912,
      "loss": 0.3154,
      "step": 895
    },
    {
      "epoch": 0.3308714918759232,
      "grad_norm": 0.3130100965499878,
      "learning_rate": 0.00017805148417292772,
      "loss": 0.3693,
      "step": 896
    },
    {
      "epoch": 0.33124076809453473,
      "grad_norm": 0.26682716608047485,
      "learning_rate": 0.0001780268505973642,
      "loss": 0.2852,
      "step": 897
    },
    {
      "epoch": 0.3316100443131462,
      "grad_norm": 0.33813703060150146,
      "learning_rate": 0.00017800221702180073,
      "loss": 0.4482,
      "step": 898
    },
    {
      "epoch": 0.33197932053175777,
      "grad_norm": 0.3224930465221405,
      "learning_rate": 0.00017797758344623724,
      "loss": 0.3355,
      "step": 899
    },
    {
      "epoch": 0.33234859675036926,
      "grad_norm": 0.3120392858982086,
      "learning_rate": 0.00017795294987067375,
      "loss": 0.3994,
      "step": 900
    },
    {
      "epoch": 0.33234859675036926,
      "eval_loss": 0.33280622959136963,
      "eval_runtime": 5.8567,
      "eval_samples_per_second": 8.537,
      "eval_steps_per_second": 1.195,
      "step": 900
    },
    {
      "epoch": 0.3327178729689808,
      "grad_norm": 0.2731114327907562,
      "learning_rate": 0.00017792831629511024,
      "loss": 0.28,
      "step": 901
    },
    {
      "epoch": 0.3330871491875923,
      "grad_norm": 0.3192294239997864,
      "learning_rate": 0.00017790368271954676,
      "loss": 0.4177,
      "step": 902
    },
    {
      "epoch": 0.33345642540620385,
      "grad_norm": 0.28603675961494446,
      "learning_rate": 0.00017787904914398324,
      "loss": 0.3673,
      "step": 903
    },
    {
      "epoch": 0.33382570162481534,
      "grad_norm": 0.2774650454521179,
      "learning_rate": 0.00017785441556841979,
      "loss": 0.2758,
      "step": 904
    },
    {
      "epoch": 0.3341949778434269,
      "grad_norm": 0.31270119547843933,
      "learning_rate": 0.00017782978199285627,
      "loss": 0.3327,
      "step": 905
    },
    {
      "epoch": 0.3345642540620384,
      "grad_norm": 0.3085302710533142,
      "learning_rate": 0.0001778051484172928,
      "loss": 0.3492,
      "step": 906
    },
    {
      "epoch": 0.3349335302806499,
      "grad_norm": 0.2787379026412964,
      "learning_rate": 0.00017778051484172928,
      "loss": 0.2969,
      "step": 907
    },
    {
      "epoch": 0.3353028064992615,
      "grad_norm": 0.28858089447021484,
      "learning_rate": 0.0001777558812661658,
      "loss": 0.3691,
      "step": 908
    },
    {
      "epoch": 0.33567208271787297,
      "grad_norm": 0.3041202127933502,
      "learning_rate": 0.0001777312476906023,
      "loss": 0.3389,
      "step": 909
    },
    {
      "epoch": 0.3360413589364845,
      "grad_norm": 0.28655505180358887,
      "learning_rate": 0.00017770661411503882,
      "loss": 0.3701,
      "step": 910
    },
    {
      "epoch": 0.336410635155096,
      "grad_norm": 0.2661409378051758,
      "learning_rate": 0.0001776819805394753,
      "loss": 0.3454,
      "step": 911
    },
    {
      "epoch": 0.33677991137370755,
      "grad_norm": 0.2793481647968292,
      "learning_rate": 0.00017765734696391182,
      "loss": 0.3738,
      "step": 912
    },
    {
      "epoch": 0.33714918759231904,
      "grad_norm": 0.2695358097553253,
      "learning_rate": 0.00017763271338834834,
      "loss": 0.352,
      "step": 913
    },
    {
      "epoch": 0.3375184638109306,
      "grad_norm": 0.3453199863433838,
      "learning_rate": 0.00017760807981278485,
      "loss": 0.3278,
      "step": 914
    },
    {
      "epoch": 0.3378877400295421,
      "grad_norm": 0.2923884093761444,
      "learning_rate": 0.00017758344623722134,
      "loss": 0.368,
      "step": 915
    },
    {
      "epoch": 0.33825701624815363,
      "grad_norm": 0.3048684895038605,
      "learning_rate": 0.00017755881266165785,
      "loss": 0.3263,
      "step": 916
    },
    {
      "epoch": 0.3386262924667651,
      "grad_norm": 0.27479395270347595,
      "learning_rate": 0.00017753417908609437,
      "loss": 0.3459,
      "step": 917
    },
    {
      "epoch": 0.33899556868537667,
      "grad_norm": 0.34671294689178467,
      "learning_rate": 0.00017750954551053088,
      "loss": 0.3735,
      "step": 918
    },
    {
      "epoch": 0.33936484490398816,
      "grad_norm": 0.31332647800445557,
      "learning_rate": 0.00017748491193496737,
      "loss": 0.3626,
      "step": 919
    },
    {
      "epoch": 0.3397341211225997,
      "grad_norm": 0.23130548000335693,
      "learning_rate": 0.00017746027835940388,
      "loss": 0.3068,
      "step": 920
    },
    {
      "epoch": 0.3401033973412112,
      "grad_norm": 0.3047637939453125,
      "learning_rate": 0.00017743564478384037,
      "loss": 0.3439,
      "step": 921
    },
    {
      "epoch": 0.34047267355982275,
      "grad_norm": 0.33907750248908997,
      "learning_rate": 0.00017741101120827691,
      "loss": 0.3617,
      "step": 922
    },
    {
      "epoch": 0.3408419497784343,
      "grad_norm": 0.32694822549819946,
      "learning_rate": 0.0001773863776327134,
      "loss": 0.3503,
      "step": 923
    },
    {
      "epoch": 0.3412112259970458,
      "grad_norm": 0.27101773023605347,
      "learning_rate": 0.00017736174405714992,
      "loss": 0.3474,
      "step": 924
    },
    {
      "epoch": 0.34158050221565733,
      "grad_norm": 0.2639794945716858,
      "learning_rate": 0.0001773371104815864,
      "loss": 0.3078,
      "step": 925
    },
    {
      "epoch": 0.3419497784342688,
      "grad_norm": 0.2643485367298126,
      "learning_rate": 0.00017731247690602292,
      "loss": 0.2704,
      "step": 926
    },
    {
      "epoch": 0.3423190546528804,
      "grad_norm": 0.2682543992996216,
      "learning_rate": 0.00017728784333045943,
      "loss": 0.365,
      "step": 927
    },
    {
      "epoch": 0.34268833087149186,
      "grad_norm": 0.2998700439929962,
      "learning_rate": 0.00017726320975489595,
      "loss": 0.3521,
      "step": 928
    },
    {
      "epoch": 0.3430576070901034,
      "grad_norm": 0.3435867726802826,
      "learning_rate": 0.00017723857617933244,
      "loss": 0.3718,
      "step": 929
    },
    {
      "epoch": 0.3434268833087149,
      "grad_norm": 0.30668380856513977,
      "learning_rate": 0.00017721394260376895,
      "loss": 0.389,
      "step": 930
    },
    {
      "epoch": 0.34379615952732645,
      "grad_norm": 0.33005931973457336,
      "learning_rate": 0.00017718930902820546,
      "loss": 0.4633,
      "step": 931
    },
    {
      "epoch": 0.34416543574593794,
      "grad_norm": 0.3313938081264496,
      "learning_rate": 0.00017716467545264198,
      "loss": 0.4582,
      "step": 932
    },
    {
      "epoch": 0.3445347119645495,
      "grad_norm": 0.27533599734306335,
      "learning_rate": 0.00017714004187707847,
      "loss": 0.3448,
      "step": 933
    },
    {
      "epoch": 0.344903988183161,
      "grad_norm": 0.31112176179885864,
      "learning_rate": 0.00017711540830151495,
      "loss": 0.3978,
      "step": 934
    },
    {
      "epoch": 0.34527326440177253,
      "grad_norm": 0.30256086587905884,
      "learning_rate": 0.00017709077472595147,
      "loss": 0.3801,
      "step": 935
    },
    {
      "epoch": 0.345642540620384,
      "grad_norm": 0.2602297365665436,
      "learning_rate": 0.00017706614115038798,
      "loss": 0.3414,
      "step": 936
    },
    {
      "epoch": 0.34601181683899557,
      "grad_norm": 0.23241616785526276,
      "learning_rate": 0.0001770415075748245,
      "loss": 0.3051,
      "step": 937
    },
    {
      "epoch": 0.3463810930576071,
      "grad_norm": 0.2802417576313019,
      "learning_rate": 0.00017701687399926099,
      "loss": 0.2978,
      "step": 938
    },
    {
      "epoch": 0.3467503692762186,
      "grad_norm": 0.2938059866428375,
      "learning_rate": 0.0001769922404236975,
      "loss": 0.3361,
      "step": 939
    },
    {
      "epoch": 0.34711964549483015,
      "grad_norm": 0.2498949021100998,
      "learning_rate": 0.00017696760684813401,
      "loss": 0.3115,
      "step": 940
    },
    {
      "epoch": 0.34748892171344165,
      "grad_norm": 0.2669752240180969,
      "learning_rate": 0.00017694297327257053,
      "loss": 0.288,
      "step": 941
    },
    {
      "epoch": 0.3478581979320532,
      "grad_norm": 0.29485201835632324,
      "learning_rate": 0.00017691833969700702,
      "loss": 0.2898,
      "step": 942
    },
    {
      "epoch": 0.3482274741506647,
      "grad_norm": 0.28879061341285706,
      "learning_rate": 0.00017689370612144353,
      "loss": 0.311,
      "step": 943
    },
    {
      "epoch": 0.34859675036927623,
      "grad_norm": 0.25666582584381104,
      "learning_rate": 0.00017686907254588002,
      "loss": 0.3279,
      "step": 944
    },
    {
      "epoch": 0.3489660265878877,
      "grad_norm": 0.2344098687171936,
      "learning_rate": 0.00017684443897031656,
      "loss": 0.307,
      "step": 945
    },
    {
      "epoch": 0.34933530280649927,
      "grad_norm": 0.26992878317832947,
      "learning_rate": 0.00017681980539475305,
      "loss": 0.3829,
      "step": 946
    },
    {
      "epoch": 0.34970457902511076,
      "grad_norm": 0.28339770436286926,
      "learning_rate": 0.00017679517181918956,
      "loss": 0.3002,
      "step": 947
    },
    {
      "epoch": 0.3500738552437223,
      "grad_norm": 0.2965022921562195,
      "learning_rate": 0.00017677053824362605,
      "loss": 0.3751,
      "step": 948
    },
    {
      "epoch": 0.3504431314623338,
      "grad_norm": 0.2937091588973999,
      "learning_rate": 0.00017674590466806257,
      "loss": 0.3755,
      "step": 949
    },
    {
      "epoch": 0.35081240768094535,
      "grad_norm": 0.23199358582496643,
      "learning_rate": 0.00017672127109249908,
      "loss": 0.2383,
      "step": 950
    },
    {
      "epoch": 0.35081240768094535,
      "eval_loss": 0.3347838222980499,
      "eval_runtime": 5.8517,
      "eval_samples_per_second": 8.545,
      "eval_steps_per_second": 1.196,
      "step": 950
    },
    {
      "epoch": 0.3511816838995569,
      "grad_norm": 0.23240543901920319,
      "learning_rate": 0.0001766966375169356,
      "loss": 0.2812,
      "step": 951
    },
    {
      "epoch": 0.3515509601181684,
      "grad_norm": 0.27188020944595337,
      "learning_rate": 0.00017667200394137208,
      "loss": 0.3252,
      "step": 952
    },
    {
      "epoch": 0.35192023633677993,
      "grad_norm": 0.30740195512771606,
      "learning_rate": 0.0001766473703658086,
      "loss": 0.3731,
      "step": 953
    },
    {
      "epoch": 0.3522895125553914,
      "grad_norm": 0.27258527278900146,
      "learning_rate": 0.0001766227367902451,
      "loss": 0.2974,
      "step": 954
    },
    {
      "epoch": 0.352658788774003,
      "grad_norm": 0.2678452134132385,
      "learning_rate": 0.00017659810321468163,
      "loss": 0.2672,
      "step": 955
    },
    {
      "epoch": 0.35302806499261447,
      "grad_norm": 0.2737193703651428,
      "learning_rate": 0.0001765734696391181,
      "loss": 0.3152,
      "step": 956
    },
    {
      "epoch": 0.353397341211226,
      "grad_norm": 0.37009337544441223,
      "learning_rate": 0.00017654883606355463,
      "loss": 0.3848,
      "step": 957
    },
    {
      "epoch": 0.3537666174298375,
      "grad_norm": 0.272429496049881,
      "learning_rate": 0.00017652420248799114,
      "loss": 0.3315,
      "step": 958
    },
    {
      "epoch": 0.35413589364844905,
      "grad_norm": 0.2654068171977997,
      "learning_rate": 0.00017649956891242766,
      "loss": 0.3273,
      "step": 959
    },
    {
      "epoch": 0.35450516986706054,
      "grad_norm": 0.2581124007701874,
      "learning_rate": 0.00017647493533686414,
      "loss": 0.3332,
      "step": 960
    },
    {
      "epoch": 0.3548744460856721,
      "grad_norm": 0.38511034846305847,
      "learning_rate": 0.00017645030176130066,
      "loss": 0.3301,
      "step": 961
    },
    {
      "epoch": 0.3552437223042836,
      "grad_norm": 0.2135087251663208,
      "learning_rate": 0.00017642566818573715,
      "loss": 0.2557,
      "step": 962
    },
    {
      "epoch": 0.35561299852289513,
      "grad_norm": 0.23889155685901642,
      "learning_rate": 0.0001764010346101737,
      "loss": 0.2628,
      "step": 963
    },
    {
      "epoch": 0.3559822747415066,
      "grad_norm": 0.3801189064979553,
      "learning_rate": 0.00017637640103461018,
      "loss": 0.4222,
      "step": 964
    },
    {
      "epoch": 0.35635155096011817,
      "grad_norm": 0.2972833514213562,
      "learning_rate": 0.0001763517674590467,
      "loss": 0.3273,
      "step": 965
    },
    {
      "epoch": 0.3567208271787297,
      "grad_norm": 0.2821199893951416,
      "learning_rate": 0.00017632713388348318,
      "loss": 0.3254,
      "step": 966
    },
    {
      "epoch": 0.3570901033973412,
      "grad_norm": 0.24031786620616913,
      "learning_rate": 0.0001763025003079197,
      "loss": 0.2832,
      "step": 967
    },
    {
      "epoch": 0.35745937961595275,
      "grad_norm": 0.3090129494667053,
      "learning_rate": 0.0001762778667323562,
      "loss": 0.3797,
      "step": 968
    },
    {
      "epoch": 0.35782865583456425,
      "grad_norm": 0.2988395392894745,
      "learning_rate": 0.00017625323315679272,
      "loss": 0.2869,
      "step": 969
    },
    {
      "epoch": 0.3581979320531758,
      "grad_norm": 0.2711489498615265,
      "learning_rate": 0.0001762285995812292,
      "loss": 0.3346,
      "step": 970
    },
    {
      "epoch": 0.3585672082717873,
      "grad_norm": 0.2808724343776703,
      "learning_rate": 0.00017620396600566572,
      "loss": 0.3403,
      "step": 971
    },
    {
      "epoch": 0.35893648449039883,
      "grad_norm": 0.2992192804813385,
      "learning_rate": 0.00017617933243010224,
      "loss": 0.3174,
      "step": 972
    },
    {
      "epoch": 0.3593057607090103,
      "grad_norm": 0.3064062297344208,
      "learning_rate": 0.00017615469885453875,
      "loss": 0.3739,
      "step": 973
    },
    {
      "epoch": 0.35967503692762187,
      "grad_norm": 0.26694345474243164,
      "learning_rate": 0.00017613006527897524,
      "loss": 0.306,
      "step": 974
    },
    {
      "epoch": 0.36004431314623336,
      "grad_norm": 0.32117223739624023,
      "learning_rate": 0.00017610543170341176,
      "loss": 0.4091,
      "step": 975
    },
    {
      "epoch": 0.3604135893648449,
      "grad_norm": 0.2756253480911255,
      "learning_rate": 0.00017608079812784824,
      "loss": 0.3541,
      "step": 976
    },
    {
      "epoch": 0.3607828655834564,
      "grad_norm": 0.333671897649765,
      "learning_rate": 0.00017605616455228479,
      "loss": 0.3691,
      "step": 977
    },
    {
      "epoch": 0.36115214180206795,
      "grad_norm": 0.34428882598876953,
      "learning_rate": 0.00017603153097672127,
      "loss": 0.394,
      "step": 978
    },
    {
      "epoch": 0.36152141802067944,
      "grad_norm": 0.2882046401500702,
      "learning_rate": 0.0001760068974011578,
      "loss": 0.3056,
      "step": 979
    },
    {
      "epoch": 0.361890694239291,
      "grad_norm": 0.2911432683467865,
      "learning_rate": 0.00017598226382559428,
      "loss": 0.3761,
      "step": 980
    },
    {
      "epoch": 0.36225997045790254,
      "grad_norm": 0.29542505741119385,
      "learning_rate": 0.0001759576302500308,
      "loss": 0.3443,
      "step": 981
    },
    {
      "epoch": 0.362629246676514,
      "grad_norm": 0.2800205647945404,
      "learning_rate": 0.0001759329966744673,
      "loss": 0.3417,
      "step": 982
    },
    {
      "epoch": 0.3629985228951256,
      "grad_norm": 0.2971092462539673,
      "learning_rate": 0.00017590836309890382,
      "loss": 0.3583,
      "step": 983
    },
    {
      "epoch": 0.36336779911373707,
      "grad_norm": 0.24899259209632874,
      "learning_rate": 0.0001758837295233403,
      "loss": 0.3214,
      "step": 984
    },
    {
      "epoch": 0.3637370753323486,
      "grad_norm": 0.3360452651977539,
      "learning_rate": 0.00017585909594777682,
      "loss": 0.4322,
      "step": 985
    },
    {
      "epoch": 0.3641063515509601,
      "grad_norm": 0.2182115614414215,
      "learning_rate": 0.00017583446237221334,
      "loss": 0.2556,
      "step": 986
    },
    {
      "epoch": 0.36447562776957165,
      "grad_norm": 0.2815421223640442,
      "learning_rate": 0.00017580982879664985,
      "loss": 0.3169,
      "step": 987
    },
    {
      "epoch": 0.36484490398818314,
      "grad_norm": 0.25160837173461914,
      "learning_rate": 0.00017578519522108634,
      "loss": 0.3387,
      "step": 988
    },
    {
      "epoch": 0.3652141802067947,
      "grad_norm": 0.2743687331676483,
      "learning_rate": 0.00017576056164552285,
      "loss": 0.3412,
      "step": 989
    },
    {
      "epoch": 0.3655834564254062,
      "grad_norm": 0.27276766300201416,
      "learning_rate": 0.00017573592806995937,
      "loss": 0.3259,
      "step": 990
    },
    {
      "epoch": 0.36595273264401773,
      "grad_norm": 0.25966206192970276,
      "learning_rate": 0.00017571129449439588,
      "loss": 0.3491,
      "step": 991
    },
    {
      "epoch": 0.3663220088626292,
      "grad_norm": 0.2842404544353485,
      "learning_rate": 0.00017568666091883237,
      "loss": 0.3525,
      "step": 992
    },
    {
      "epoch": 0.36669128508124077,
      "grad_norm": 0.26605677604675293,
      "learning_rate": 0.00017566202734326888,
      "loss": 0.3481,
      "step": 993
    },
    {
      "epoch": 0.3670605612998523,
      "grad_norm": 0.30087584257125854,
      "learning_rate": 0.00017563739376770537,
      "loss": 0.3134,
      "step": 994
    },
    {
      "epoch": 0.3674298375184638,
      "grad_norm": 0.38111740350723267,
      "learning_rate": 0.0001756127601921419,
      "loss": 0.3858,
      "step": 995
    },
    {
      "epoch": 0.36779911373707536,
      "grad_norm": 0.3130910098552704,
      "learning_rate": 0.0001755881266165784,
      "loss": 0.3217,
      "step": 996
    },
    {
      "epoch": 0.36816838995568685,
      "grad_norm": 0.24693329632282257,
      "learning_rate": 0.00017556349304101492,
      "loss": 0.3074,
      "step": 997
    },
    {
      "epoch": 0.3685376661742984,
      "grad_norm": 0.2819662094116211,
      "learning_rate": 0.0001755388594654514,
      "loss": 0.3732,
      "step": 998
    },
    {
      "epoch": 0.3689069423929099,
      "grad_norm": 0.28971341252326965,
      "learning_rate": 0.00017551422588988792,
      "loss": 0.3584,
      "step": 999
    },
    {
      "epoch": 0.36927621861152143,
      "grad_norm": 0.3545287847518921,
      "learning_rate": 0.00017548959231432443,
      "loss": 0.4349,
      "step": 1000
    },
    {
      "epoch": 0.36927621861152143,
      "eval_loss": 0.33093270659446716,
      "eval_runtime": 5.854,
      "eval_samples_per_second": 8.541,
      "eval_steps_per_second": 1.196,
      "step": 1000
    },
    {
      "epoch": 0.3696454948301329,
      "grad_norm": 0.9900842905044556,
      "learning_rate": 0.00017546495873876095,
      "loss": 0.3643,
      "step": 1001
    },
    {
      "epoch": 0.3700147710487445,
      "grad_norm": 0.34718453884124756,
      "learning_rate": 0.00017544032516319743,
      "loss": 0.36,
      "step": 1002
    },
    {
      "epoch": 0.37038404726735596,
      "grad_norm": 0.27414506673812866,
      "learning_rate": 0.00017541569158763395,
      "loss": 0.3202,
      "step": 1003
    },
    {
      "epoch": 0.3707533234859675,
      "grad_norm": 0.2744106650352478,
      "learning_rate": 0.00017539105801207046,
      "loss": 0.3782,
      "step": 1004
    },
    {
      "epoch": 0.371122599704579,
      "grad_norm": 0.23309417068958282,
      "learning_rate": 0.00017536642443650698,
      "loss": 0.2805,
      "step": 1005
    },
    {
      "epoch": 0.37149187592319055,
      "grad_norm": 0.2333519160747528,
      "learning_rate": 0.00017534179086094347,
      "loss": 0.288,
      "step": 1006
    },
    {
      "epoch": 0.37186115214180204,
      "grad_norm": 0.2939945459365845,
      "learning_rate": 0.00017531715728537998,
      "loss": 0.4267,
      "step": 1007
    },
    {
      "epoch": 0.3722304283604136,
      "grad_norm": 0.2831266224384308,
      "learning_rate": 0.00017529252370981647,
      "loss": 0.3193,
      "step": 1008
    },
    {
      "epoch": 0.37259970457902514,
      "grad_norm": 0.3656313419342041,
      "learning_rate": 0.000175267890134253,
      "loss": 0.3855,
      "step": 1009
    },
    {
      "epoch": 0.37296898079763663,
      "grad_norm": 0.25398287177085876,
      "learning_rate": 0.0001752432565586895,
      "loss": 0.2697,
      "step": 1010
    },
    {
      "epoch": 0.3733382570162482,
      "grad_norm": 0.2820013165473938,
      "learning_rate": 0.000175218622983126,
      "loss": 0.3384,
      "step": 1011
    },
    {
      "epoch": 0.37370753323485967,
      "grad_norm": 0.27855661511421204,
      "learning_rate": 0.0001751939894075625,
      "loss": 0.3441,
      "step": 1012
    },
    {
      "epoch": 0.3740768094534712,
      "grad_norm": 0.33842533826828003,
      "learning_rate": 0.00017516935583199901,
      "loss": 0.3638,
      "step": 1013
    },
    {
      "epoch": 0.3744460856720827,
      "grad_norm": 0.418399453163147,
      "learning_rate": 0.00017514472225643553,
      "loss": 0.38,
      "step": 1014
    },
    {
      "epoch": 0.37481536189069425,
      "grad_norm": 0.29497411847114563,
      "learning_rate": 0.00017512008868087204,
      "loss": 0.3665,
      "step": 1015
    },
    {
      "epoch": 0.37518463810930575,
      "grad_norm": 0.3119269907474518,
      "learning_rate": 0.00017509545510530853,
      "loss": 0.3562,
      "step": 1016
    },
    {
      "epoch": 0.3755539143279173,
      "grad_norm": 0.3685709238052368,
      "learning_rate": 0.00017507082152974505,
      "loss": 0.3094,
      "step": 1017
    },
    {
      "epoch": 0.3759231905465288,
      "grad_norm": 0.31541427969932556,
      "learning_rate": 0.00017504618795418156,
      "loss": 0.4056,
      "step": 1018
    },
    {
      "epoch": 0.37629246676514033,
      "grad_norm": 0.28675368428230286,
      "learning_rate": 0.00017502155437861808,
      "loss": 0.305,
      "step": 1019
    },
    {
      "epoch": 0.3766617429837518,
      "grad_norm": 0.2524102032184601,
      "learning_rate": 0.00017499692080305456,
      "loss": 0.268,
      "step": 1020
    },
    {
      "epoch": 0.37703101920236337,
      "grad_norm": 0.3210057318210602,
      "learning_rate": 0.00017497228722749108,
      "loss": 0.4261,
      "step": 1021
    },
    {
      "epoch": 0.37740029542097486,
      "grad_norm": 0.24090994894504547,
      "learning_rate": 0.0001749476536519276,
      "loss": 0.2977,
      "step": 1022
    },
    {
      "epoch": 0.3777695716395864,
      "grad_norm": 0.384414404630661,
      "learning_rate": 0.0001749230200763641,
      "loss": 0.3994,
      "step": 1023
    },
    {
      "epoch": 0.37813884785819796,
      "grad_norm": 0.31013649702072144,
      "learning_rate": 0.0001748983865008006,
      "loss": 0.3856,
      "step": 1024
    },
    {
      "epoch": 0.37850812407680945,
      "grad_norm": 0.24665279686450958,
      "learning_rate": 0.0001748737529252371,
      "loss": 0.2265,
      "step": 1025
    },
    {
      "epoch": 0.378877400295421,
      "grad_norm": 0.4522167146205902,
      "learning_rate": 0.0001748491193496736,
      "loss": 0.3952,
      "step": 1026
    },
    {
      "epoch": 0.3792466765140325,
      "grad_norm": 0.270622581243515,
      "learning_rate": 0.00017482448577411014,
      "loss": 0.3162,
      "step": 1027
    },
    {
      "epoch": 0.37961595273264404,
      "grad_norm": 0.32009977102279663,
      "learning_rate": 0.00017479985219854663,
      "loss": 0.2769,
      "step": 1028
    },
    {
      "epoch": 0.3799852289512555,
      "grad_norm": 0.2599954903125763,
      "learning_rate": 0.00017477521862298314,
      "loss": 0.3331,
      "step": 1029
    },
    {
      "epoch": 0.3803545051698671,
      "grad_norm": 0.29672732949256897,
      "learning_rate": 0.00017475058504741963,
      "loss": 0.3365,
      "step": 1030
    },
    {
      "epoch": 0.38072378138847857,
      "grad_norm": 0.37659427523612976,
      "learning_rate": 0.00017472595147185614,
      "loss": 0.2988,
      "step": 1031
    },
    {
      "epoch": 0.3810930576070901,
      "grad_norm": 0.25498005747795105,
      "learning_rate": 0.00017470131789629266,
      "loss": 0.2885,
      "step": 1032
    },
    {
      "epoch": 0.3814623338257016,
      "grad_norm": 0.28253865242004395,
      "learning_rate": 0.00017467668432072917,
      "loss": 0.3096,
      "step": 1033
    },
    {
      "epoch": 0.38183161004431315,
      "grad_norm": 0.24322527647018433,
      "learning_rate": 0.00017465205074516566,
      "loss": 0.2841,
      "step": 1034
    },
    {
      "epoch": 0.38220088626292464,
      "grad_norm": 0.2777494192123413,
      "learning_rate": 0.00017462741716960217,
      "loss": 0.3519,
      "step": 1035
    },
    {
      "epoch": 0.3825701624815362,
      "grad_norm": 0.27474284172058105,
      "learning_rate": 0.0001746027835940387,
      "loss": 0.2772,
      "step": 1036
    },
    {
      "epoch": 0.3829394387001477,
      "grad_norm": 0.29572758078575134,
      "learning_rate": 0.0001745781500184752,
      "loss": 0.2974,
      "step": 1037
    },
    {
      "epoch": 0.38330871491875923,
      "grad_norm": 0.3762396275997162,
      "learning_rate": 0.0001745535164429117,
      "loss": 0.3297,
      "step": 1038
    },
    {
      "epoch": 0.3836779911373708,
      "grad_norm": 0.28851041197776794,
      "learning_rate": 0.0001745288828673482,
      "loss": 0.3641,
      "step": 1039
    },
    {
      "epoch": 0.38404726735598227,
      "grad_norm": 0.281868577003479,
      "learning_rate": 0.0001745042492917847,
      "loss": 0.342,
      "step": 1040
    },
    {
      "epoch": 0.3844165435745938,
      "grad_norm": 0.2661024332046509,
      "learning_rate": 0.00017447961571622123,
      "loss": 0.3789,
      "step": 1041
    },
    {
      "epoch": 0.3847858197932053,
      "grad_norm": 0.2997417449951172,
      "learning_rate": 0.00017445498214065772,
      "loss": 0.368,
      "step": 1042
    },
    {
      "epoch": 0.38515509601181686,
      "grad_norm": 0.3276411294937134,
      "learning_rate": 0.00017443034856509424,
      "loss": 0.3105,
      "step": 1043
    },
    {
      "epoch": 0.38552437223042835,
      "grad_norm": 0.3410065472126007,
      "learning_rate": 0.00017440571498953072,
      "loss": 0.3864,
      "step": 1044
    },
    {
      "epoch": 0.3858936484490399,
      "grad_norm": 0.30084607005119324,
      "learning_rate": 0.00017438108141396724,
      "loss": 0.309,
      "step": 1045
    },
    {
      "epoch": 0.3862629246676514,
      "grad_norm": 0.3507026433944702,
      "learning_rate": 0.00017435644783840375,
      "loss": 0.3668,
      "step": 1046
    },
    {
      "epoch": 0.38663220088626293,
      "grad_norm": 0.3287352919578552,
      "learning_rate": 0.00017433181426284027,
      "loss": 0.4588,
      "step": 1047
    },
    {
      "epoch": 0.3870014771048744,
      "grad_norm": 0.31868648529052734,
      "learning_rate": 0.00017430718068727676,
      "loss": 0.3658,
      "step": 1048
    },
    {
      "epoch": 0.387370753323486,
      "grad_norm": 0.29755568504333496,
      "learning_rate": 0.00017428254711171327,
      "loss": 0.3829,
      "step": 1049
    },
    {
      "epoch": 0.38774002954209746,
      "grad_norm": 0.2674624025821686,
      "learning_rate": 0.00017425791353614978,
      "loss": 0.3566,
      "step": 1050
    },
    {
      "epoch": 0.38774002954209746,
      "eval_loss": 0.3311212658882141,
      "eval_runtime": 5.8514,
      "eval_samples_per_second": 8.545,
      "eval_steps_per_second": 1.196,
      "step": 1050
    },
    {
      "epoch": 0.388109305760709,
      "grad_norm": 0.29503870010375977,
      "learning_rate": 0.0001742332799605863,
      "loss": 0.3396,
      "step": 1051
    },
    {
      "epoch": 0.38847858197932056,
      "grad_norm": 0.30640003085136414,
      "learning_rate": 0.0001742086463850228,
      "loss": 0.3289,
      "step": 1052
    },
    {
      "epoch": 0.38884785819793205,
      "grad_norm": 0.3086230158805847,
      "learning_rate": 0.0001741840128094593,
      "loss": 0.3619,
      "step": 1053
    },
    {
      "epoch": 0.3892171344165436,
      "grad_norm": 0.3033970892429352,
      "learning_rate": 0.00017415937923389582,
      "loss": 0.3432,
      "step": 1054
    },
    {
      "epoch": 0.3895864106351551,
      "grad_norm": 0.2585624158382416,
      "learning_rate": 0.00017413474565833233,
      "loss": 0.2708,
      "step": 1055
    },
    {
      "epoch": 0.38995568685376664,
      "grad_norm": 0.24220868945121765,
      "learning_rate": 0.00017411011208276882,
      "loss": 0.2979,
      "step": 1056
    },
    {
      "epoch": 0.39032496307237813,
      "grad_norm": 0.2443268597126007,
      "learning_rate": 0.00017408547850720533,
      "loss": 0.3052,
      "step": 1057
    },
    {
      "epoch": 0.3906942392909897,
      "grad_norm": 0.2863035202026367,
      "learning_rate": 0.00017406084493164182,
      "loss": 0.3404,
      "step": 1058
    },
    {
      "epoch": 0.39106351550960117,
      "grad_norm": 0.25512877106666565,
      "learning_rate": 0.00017403621135607836,
      "loss": 0.3231,
      "step": 1059
    },
    {
      "epoch": 0.3914327917282127,
      "grad_norm": 0.32268014550209045,
      "learning_rate": 0.00017401157778051485,
      "loss": 0.3773,
      "step": 1060
    },
    {
      "epoch": 0.3918020679468242,
      "grad_norm": 0.4049038290977478,
      "learning_rate": 0.00017398694420495136,
      "loss": 0.3639,
      "step": 1061
    },
    {
      "epoch": 0.39217134416543575,
      "grad_norm": 0.3472083508968353,
      "learning_rate": 0.00017396231062938785,
      "loss": 0.3084,
      "step": 1062
    },
    {
      "epoch": 0.39254062038404725,
      "grad_norm": 0.320273756980896,
      "learning_rate": 0.00017393767705382437,
      "loss": 0.3575,
      "step": 1063
    },
    {
      "epoch": 0.3929098966026588,
      "grad_norm": 0.2545672357082367,
      "learning_rate": 0.00017391304347826088,
      "loss": 0.3381,
      "step": 1064
    },
    {
      "epoch": 0.3932791728212703,
      "grad_norm": 0.31159624457359314,
      "learning_rate": 0.0001738884099026974,
      "loss": 0.3584,
      "step": 1065
    },
    {
      "epoch": 0.39364844903988183,
      "grad_norm": 0.2954980731010437,
      "learning_rate": 0.00017386377632713388,
      "loss": 0.3149,
      "step": 1066
    },
    {
      "epoch": 0.3940177252584934,
      "grad_norm": 0.3100495934486389,
      "learning_rate": 0.0001738391427515704,
      "loss": 0.3778,
      "step": 1067
    },
    {
      "epoch": 0.39438700147710487,
      "grad_norm": 0.2485426664352417,
      "learning_rate": 0.0001738145091760069,
      "loss": 0.2661,
      "step": 1068
    },
    {
      "epoch": 0.3947562776957164,
      "grad_norm": 0.297589510679245,
      "learning_rate": 0.00017378987560044343,
      "loss": 0.3443,
      "step": 1069
    },
    {
      "epoch": 0.3951255539143279,
      "grad_norm": 0.27318239212036133,
      "learning_rate": 0.00017376524202487992,
      "loss": 0.3468,
      "step": 1070
    },
    {
      "epoch": 0.39549483013293946,
      "grad_norm": 0.31159114837646484,
      "learning_rate": 0.00017374060844931643,
      "loss": 0.3596,
      "step": 1071
    },
    {
      "epoch": 0.39586410635155095,
      "grad_norm": 0.32290807366371155,
      "learning_rate": 0.00017371597487375292,
      "loss": 0.4303,
      "step": 1072
    },
    {
      "epoch": 0.3962333825701625,
      "grad_norm": 0.3056161403656006,
      "learning_rate": 0.00017369134129818946,
      "loss": 0.3181,
      "step": 1073
    },
    {
      "epoch": 0.396602658788774,
      "grad_norm": 0.3475019931793213,
      "learning_rate": 0.00017366670772262595,
      "loss": 0.3123,
      "step": 1074
    },
    {
      "epoch": 0.39697193500738553,
      "grad_norm": 0.2533319592475891,
      "learning_rate": 0.00017364207414706246,
      "loss": 0.3335,
      "step": 1075
    },
    {
      "epoch": 0.397341211225997,
      "grad_norm": 0.2805590331554413,
      "learning_rate": 0.00017361744057149895,
      "loss": 0.334,
      "step": 1076
    },
    {
      "epoch": 0.3977104874446086,
      "grad_norm": 0.2707286477088928,
      "learning_rate": 0.00017359280699593546,
      "loss": 0.3292,
      "step": 1077
    },
    {
      "epoch": 0.39807976366322007,
      "grad_norm": 0.26679933071136475,
      "learning_rate": 0.00017356817342037198,
      "loss": 0.334,
      "step": 1078
    },
    {
      "epoch": 0.3984490398818316,
      "grad_norm": 0.2567000389099121,
      "learning_rate": 0.0001735435398448085,
      "loss": 0.3601,
      "step": 1079
    },
    {
      "epoch": 0.3988183161004431,
      "grad_norm": 0.32759955525398254,
      "learning_rate": 0.00017351890626924498,
      "loss": 0.4266,
      "step": 1080
    },
    {
      "epoch": 0.39918759231905465,
      "grad_norm": 0.28385090827941895,
      "learning_rate": 0.0001734942726936815,
      "loss": 0.3725,
      "step": 1081
    },
    {
      "epoch": 0.3995568685376662,
      "grad_norm": 0.23193253576755524,
      "learning_rate": 0.000173469639118118,
      "loss": 0.358,
      "step": 1082
    },
    {
      "epoch": 0.3999261447562777,
      "grad_norm": 0.2786146104335785,
      "learning_rate": 0.00017344500554255452,
      "loss": 0.2927,
      "step": 1083
    },
    {
      "epoch": 0.40029542097488924,
      "grad_norm": 0.2281658947467804,
      "learning_rate": 0.000173420371966991,
      "loss": 0.3019,
      "step": 1084
    },
    {
      "epoch": 0.40066469719350073,
      "grad_norm": 0.2926419973373413,
      "learning_rate": 0.00017339573839142753,
      "loss": 0.2824,
      "step": 1085
    },
    {
      "epoch": 0.4010339734121123,
      "grad_norm": 0.24533440172672272,
      "learning_rate": 0.00017337110481586401,
      "loss": 0.307,
      "step": 1086
    },
    {
      "epoch": 0.40140324963072377,
      "grad_norm": 0.23103263974189758,
      "learning_rate": 0.00017334647124030056,
      "loss": 0.3354,
      "step": 1087
    },
    {
      "epoch": 0.4017725258493353,
      "grad_norm": 0.5301767587661743,
      "learning_rate": 0.00017332183766473704,
      "loss": 0.3756,
      "step": 1088
    },
    {
      "epoch": 0.4021418020679468,
      "grad_norm": 0.4169785976409912,
      "learning_rate": 0.00017329720408917356,
      "loss": 0.2962,
      "step": 1089
    },
    {
      "epoch": 0.40251107828655835,
      "grad_norm": 0.40505361557006836,
      "learning_rate": 0.00017327257051361005,
      "loss": 0.3986,
      "step": 1090
    },
    {
      "epoch": 0.40288035450516985,
      "grad_norm": 0.2466781735420227,
      "learning_rate": 0.00017324793693804656,
      "loss": 0.276,
      "step": 1091
    },
    {
      "epoch": 0.4032496307237814,
      "grad_norm": 0.3305412530899048,
      "learning_rate": 0.00017322330336248307,
      "loss": 0.2625,
      "step": 1092
    },
    {
      "epoch": 0.4036189069423929,
      "grad_norm": 0.28403934836387634,
      "learning_rate": 0.0001731986697869196,
      "loss": 0.3722,
      "step": 1093
    },
    {
      "epoch": 0.40398818316100443,
      "grad_norm": 0.2592989504337311,
      "learning_rate": 0.00017317403621135608,
      "loss": 0.3611,
      "step": 1094
    },
    {
      "epoch": 0.404357459379616,
      "grad_norm": 0.2557947337627411,
      "learning_rate": 0.0001731494026357926,
      "loss": 0.301,
      "step": 1095
    },
    {
      "epoch": 0.40472673559822747,
      "grad_norm": 0.25687193870544434,
      "learning_rate": 0.0001731247690602291,
      "loss": 0.3506,
      "step": 1096
    },
    {
      "epoch": 0.405096011816839,
      "grad_norm": 0.2546410858631134,
      "learning_rate": 0.00017310013548466562,
      "loss": 0.2816,
      "step": 1097
    },
    {
      "epoch": 0.4054652880354505,
      "grad_norm": 0.2537722587585449,
      "learning_rate": 0.0001730755019091021,
      "loss": 0.3187,
      "step": 1098
    },
    {
      "epoch": 0.40583456425406206,
      "grad_norm": 0.27400586009025574,
      "learning_rate": 0.00017305086833353862,
      "loss": 0.3443,
      "step": 1099
    },
    {
      "epoch": 0.40620384047267355,
      "grad_norm": 0.295478880405426,
      "learning_rate": 0.00017302623475797514,
      "loss": 0.3983,
      "step": 1100
    },
    {
      "epoch": 0.40620384047267355,
      "eval_loss": 0.3331240713596344,
      "eval_runtime": 5.8701,
      "eval_samples_per_second": 8.518,
      "eval_steps_per_second": 1.192,
      "step": 1100
    },
    {
      "epoch": 0.4065731166912851,
      "grad_norm": 0.29423660039901733,
      "learning_rate": 0.00017300160118241165,
      "loss": 0.3883,
      "step": 1101
    },
    {
      "epoch": 0.4069423929098966,
      "grad_norm": 0.27080485224723816,
      "learning_rate": 0.00017297696760684814,
      "loss": 0.3107,
      "step": 1102
    },
    {
      "epoch": 0.40731166912850814,
      "grad_norm": 0.35229095816612244,
      "learning_rate": 0.00017295233403128465,
      "loss": 0.4703,
      "step": 1103
    },
    {
      "epoch": 0.4076809453471196,
      "grad_norm": 0.2508137822151184,
      "learning_rate": 0.00017292770045572114,
      "loss": 0.3069,
      "step": 1104
    },
    {
      "epoch": 0.4080502215657312,
      "grad_norm": 0.2992240786552429,
      "learning_rate": 0.00017290306688015768,
      "loss": 0.3626,
      "step": 1105
    },
    {
      "epoch": 0.40841949778434267,
      "grad_norm": 0.2968301475048065,
      "learning_rate": 0.00017287843330459417,
      "loss": 0.342,
      "step": 1106
    },
    {
      "epoch": 0.4087887740029542,
      "grad_norm": 0.2686443030834198,
      "learning_rate": 0.00017285379972903069,
      "loss": 0.3164,
      "step": 1107
    },
    {
      "epoch": 0.4091580502215657,
      "grad_norm": 0.28745171427726746,
      "learning_rate": 0.00017282916615346717,
      "loss": 0.4006,
      "step": 1108
    },
    {
      "epoch": 0.40952732644017725,
      "grad_norm": 0.26906076073646545,
      "learning_rate": 0.0001728045325779037,
      "loss": 0.356,
      "step": 1109
    },
    {
      "epoch": 0.4098966026587888,
      "grad_norm": 0.3046380579471588,
      "learning_rate": 0.0001727798990023402,
      "loss": 0.3334,
      "step": 1110
    },
    {
      "epoch": 0.4102658788774003,
      "grad_norm": 0.3426929712295532,
      "learning_rate": 0.00017275526542677672,
      "loss": 0.4221,
      "step": 1111
    },
    {
      "epoch": 0.41063515509601184,
      "grad_norm": 0.34931817650794983,
      "learning_rate": 0.0001727306318512132,
      "loss": 0.3971,
      "step": 1112
    },
    {
      "epoch": 0.41100443131462333,
      "grad_norm": 0.22070133686065674,
      "learning_rate": 0.00017270599827564972,
      "loss": 0.2544,
      "step": 1113
    },
    {
      "epoch": 0.4113737075332349,
      "grad_norm": 0.29520881175994873,
      "learning_rate": 0.00017268136470008623,
      "loss": 0.3196,
      "step": 1114
    },
    {
      "epoch": 0.41174298375184637,
      "grad_norm": 0.2665400505065918,
      "learning_rate": 0.00017265673112452275,
      "loss": 0.3249,
      "step": 1115
    },
    {
      "epoch": 0.4121122599704579,
      "grad_norm": 0.25689318776130676,
      "learning_rate": 0.00017263209754895924,
      "loss": 0.3203,
      "step": 1116
    },
    {
      "epoch": 0.4124815361890694,
      "grad_norm": 0.31612950563430786,
      "learning_rate": 0.00017260746397339575,
      "loss": 0.3567,
      "step": 1117
    },
    {
      "epoch": 0.41285081240768096,
      "grad_norm": 0.26914507150650024,
      "learning_rate": 0.00017258283039783224,
      "loss": 0.2982,
      "step": 1118
    },
    {
      "epoch": 0.41322008862629245,
      "grad_norm": 0.2826154828071594,
      "learning_rate": 0.00017255819682226878,
      "loss": 0.3348,
      "step": 1119
    },
    {
      "epoch": 0.413589364844904,
      "grad_norm": 0.31187665462493896,
      "learning_rate": 0.00017253356324670527,
      "loss": 0.4929,
      "step": 1120
    },
    {
      "epoch": 0.4139586410635155,
      "grad_norm": 0.3398851454257965,
      "learning_rate": 0.00017250892967114178,
      "loss": 0.3903,
      "step": 1121
    },
    {
      "epoch": 0.41432791728212703,
      "grad_norm": 0.21482737362384796,
      "learning_rate": 0.00017248429609557827,
      "loss": 0.2502,
      "step": 1122
    },
    {
      "epoch": 0.4146971935007385,
      "grad_norm": 0.3095366358757019,
      "learning_rate": 0.00017245966252001478,
      "loss": 0.3447,
      "step": 1123
    },
    {
      "epoch": 0.4150664697193501,
      "grad_norm": 0.41809213161468506,
      "learning_rate": 0.0001724350289444513,
      "loss": 0.3193,
      "step": 1124
    },
    {
      "epoch": 0.4154357459379616,
      "grad_norm": 0.2830677330493927,
      "learning_rate": 0.00017241039536888781,
      "loss": 0.3829,
      "step": 1125
    },
    {
      "epoch": 0.4158050221565731,
      "grad_norm": 0.3771759271621704,
      "learning_rate": 0.0001723857617933243,
      "loss": 0.3566,
      "step": 1126
    },
    {
      "epoch": 0.41617429837518466,
      "grad_norm": 0.2990865707397461,
      "learning_rate": 0.00017236112821776082,
      "loss": 0.3912,
      "step": 1127
    },
    {
      "epoch": 0.41654357459379615,
      "grad_norm": 0.29477930068969727,
      "learning_rate": 0.00017233649464219733,
      "loss": 0.3346,
      "step": 1128
    },
    {
      "epoch": 0.4169128508124077,
      "grad_norm": 0.2764134407043457,
      "learning_rate": 0.00017231186106663385,
      "loss": 0.369,
      "step": 1129
    },
    {
      "epoch": 0.4172821270310192,
      "grad_norm": 0.24675750732421875,
      "learning_rate": 0.00017228722749107033,
      "loss": 0.28,
      "step": 1130
    },
    {
      "epoch": 0.41765140324963074,
      "grad_norm": 0.23767217993736267,
      "learning_rate": 0.00017226259391550685,
      "loss": 0.313,
      "step": 1131
    },
    {
      "epoch": 0.41802067946824223,
      "grad_norm": 0.2794254422187805,
      "learning_rate": 0.00017223796033994336,
      "loss": 0.3902,
      "step": 1132
    },
    {
      "epoch": 0.4183899556868538,
      "grad_norm": 0.27715378999710083,
      "learning_rate": 0.00017221332676437988,
      "loss": 0.3109,
      "step": 1133
    },
    {
      "epoch": 0.41875923190546527,
      "grad_norm": 0.25810864567756653,
      "learning_rate": 0.00017218869318881636,
      "loss": 0.3086,
      "step": 1134
    },
    {
      "epoch": 0.4191285081240768,
      "grad_norm": 0.22305242717266083,
      "learning_rate": 0.00017216405961325288,
      "loss": 0.313,
      "step": 1135
    },
    {
      "epoch": 0.4194977843426883,
      "grad_norm": 0.24376413226127625,
      "learning_rate": 0.00017213942603768937,
      "loss": 0.3192,
      "step": 1136
    },
    {
      "epoch": 0.41986706056129985,
      "grad_norm": 0.2753487825393677,
      "learning_rate": 0.0001721147924621259,
      "loss": 0.297,
      "step": 1137
    },
    {
      "epoch": 0.4202363367799114,
      "grad_norm": 0.24492758512496948,
      "learning_rate": 0.0001720901588865624,
      "loss": 0.2755,
      "step": 1138
    },
    {
      "epoch": 0.4206056129985229,
      "grad_norm": 0.2600553333759308,
      "learning_rate": 0.0001720655253109989,
      "loss": 0.2966,
      "step": 1139
    },
    {
      "epoch": 0.42097488921713444,
      "grad_norm": 0.25787171721458435,
      "learning_rate": 0.0001720408917354354,
      "loss": 0.3345,
      "step": 1140
    },
    {
      "epoch": 0.42134416543574593,
      "grad_norm": 0.26054611802101135,
      "learning_rate": 0.0001720162581598719,
      "loss": 0.3417,
      "step": 1141
    },
    {
      "epoch": 0.4217134416543575,
      "grad_norm": 0.23203696310520172,
      "learning_rate": 0.00017199162458430843,
      "loss": 0.2979,
      "step": 1142
    },
    {
      "epoch": 0.42208271787296897,
      "grad_norm": 0.3051937520503998,
      "learning_rate": 0.00017196699100874494,
      "loss": 0.3893,
      "step": 1143
    },
    {
      "epoch": 0.4224519940915805,
      "grad_norm": 0.32818612456321716,
      "learning_rate": 0.00017194235743318143,
      "loss": 0.4387,
      "step": 1144
    },
    {
      "epoch": 0.422821270310192,
      "grad_norm": 0.33093100786209106,
      "learning_rate": 0.00017191772385761794,
      "loss": 0.4634,
      "step": 1145
    },
    {
      "epoch": 0.42319054652880356,
      "grad_norm": 0.27430132031440735,
      "learning_rate": 0.00017189309028205446,
      "loss": 0.3448,
      "step": 1146
    },
    {
      "epoch": 0.42355982274741505,
      "grad_norm": 0.28466880321502686,
      "learning_rate": 0.00017186845670649097,
      "loss": 0.365,
      "step": 1147
    },
    {
      "epoch": 0.4239290989660266,
      "grad_norm": 0.3052275478839874,
      "learning_rate": 0.00017184382313092746,
      "loss": 0.3404,
      "step": 1148
    },
    {
      "epoch": 0.4242983751846381,
      "grad_norm": 0.2922669053077698,
      "learning_rate": 0.00017181918955536398,
      "loss": 0.307,
      "step": 1149
    },
    {
      "epoch": 0.42466765140324964,
      "grad_norm": 0.3192574083805084,
      "learning_rate": 0.00017179455597980046,
      "loss": 0.3636,
      "step": 1150
    },
    {
      "epoch": 0.42466765140324964,
      "eval_loss": 0.32198551297187805,
      "eval_runtime": 5.8536,
      "eval_samples_per_second": 8.542,
      "eval_steps_per_second": 1.196,
      "step": 1150
    },
    {
      "epoch": 0.4250369276218611,
      "grad_norm": 0.29327887296676636,
      "learning_rate": 0.000171769922404237,
      "loss": 0.3375,
      "step": 1151
    },
    {
      "epoch": 0.4254062038404727,
      "grad_norm": 0.3076665699481964,
      "learning_rate": 0.0001717452888286735,
      "loss": 0.4464,
      "step": 1152
    },
    {
      "epoch": 0.4257754800590842,
      "grad_norm": 0.27381011843681335,
      "learning_rate": 0.00017172065525311,
      "loss": 0.3446,
      "step": 1153
    },
    {
      "epoch": 0.4261447562776957,
      "grad_norm": 0.31370654702186584,
      "learning_rate": 0.0001716960216775465,
      "loss": 0.3587,
      "step": 1154
    },
    {
      "epoch": 0.42651403249630726,
      "grad_norm": 0.31399837136268616,
      "learning_rate": 0.000171671388101983,
      "loss": 0.3302,
      "step": 1155
    },
    {
      "epoch": 0.42688330871491875,
      "grad_norm": 0.26231488585472107,
      "learning_rate": 0.00017164675452641952,
      "loss": 0.3264,
      "step": 1156
    },
    {
      "epoch": 0.4272525849335303,
      "grad_norm": 0.2963448166847229,
      "learning_rate": 0.00017162212095085604,
      "loss": 0.3481,
      "step": 1157
    },
    {
      "epoch": 0.4276218611521418,
      "grad_norm": 0.28689444065093994,
      "learning_rate": 0.00017159748737529253,
      "loss": 0.3688,
      "step": 1158
    },
    {
      "epoch": 0.42799113737075334,
      "grad_norm": 0.3124240040779114,
      "learning_rate": 0.00017157285379972904,
      "loss": 0.3414,
      "step": 1159
    },
    {
      "epoch": 0.42836041358936483,
      "grad_norm": 0.27646341919898987,
      "learning_rate": 0.00017154822022416556,
      "loss": 0.3179,
      "step": 1160
    },
    {
      "epoch": 0.4287296898079764,
      "grad_norm": 0.36102649569511414,
      "learning_rate": 0.00017152358664860207,
      "loss": 0.4195,
      "step": 1161
    },
    {
      "epoch": 0.42909896602658787,
      "grad_norm": 0.28331008553504944,
      "learning_rate": 0.00017149895307303856,
      "loss": 0.3618,
      "step": 1162
    },
    {
      "epoch": 0.4294682422451994,
      "grad_norm": 0.3218463063240051,
      "learning_rate": 0.00017147431949747507,
      "loss": 0.2825,
      "step": 1163
    },
    {
      "epoch": 0.4298375184638109,
      "grad_norm": 0.2545153498649597,
      "learning_rate": 0.0001714496859219116,
      "loss": 0.304,
      "step": 1164
    },
    {
      "epoch": 0.43020679468242246,
      "grad_norm": 0.3132915496826172,
      "learning_rate": 0.00017142505234634807,
      "loss": 0.4584,
      "step": 1165
    },
    {
      "epoch": 0.43057607090103395,
      "grad_norm": 0.27413210272789,
      "learning_rate": 0.0001714004187707846,
      "loss": 0.3159,
      "step": 1166
    },
    {
      "epoch": 0.4309453471196455,
      "grad_norm": 0.27805855870246887,
      "learning_rate": 0.00017137578519522108,
      "loss": 0.44,
      "step": 1167
    },
    {
      "epoch": 0.43131462333825704,
      "grad_norm": 0.34869155287742615,
      "learning_rate": 0.0001713511516196576,
      "loss": 0.3168,
      "step": 1168
    },
    {
      "epoch": 0.43168389955686853,
      "grad_norm": 0.3351599872112274,
      "learning_rate": 0.0001713265180440941,
      "loss": 0.3541,
      "step": 1169
    },
    {
      "epoch": 0.4320531757754801,
      "grad_norm": 0.27900075912475586,
      "learning_rate": 0.00017130188446853062,
      "loss": 0.3222,
      "step": 1170
    },
    {
      "epoch": 0.4324224519940916,
      "grad_norm": 0.2621496021747589,
      "learning_rate": 0.0001712772508929671,
      "loss": 0.3318,
      "step": 1171
    },
    {
      "epoch": 0.4327917282127031,
      "grad_norm": 0.25951269268989563,
      "learning_rate": 0.00017125261731740362,
      "loss": 0.348,
      "step": 1172
    },
    {
      "epoch": 0.4331610044313146,
      "grad_norm": 0.2521866261959076,
      "learning_rate": 0.00017122798374184014,
      "loss": 0.3081,
      "step": 1173
    },
    {
      "epoch": 0.43353028064992616,
      "grad_norm": 0.2677977383136749,
      "learning_rate": 0.00017120335016627665,
      "loss": 0.3118,
      "step": 1174
    },
    {
      "epoch": 0.43389955686853765,
      "grad_norm": 0.2904331684112549,
      "learning_rate": 0.00017117871659071314,
      "loss": 0.2918,
      "step": 1175
    },
    {
      "epoch": 0.4342688330871492,
      "grad_norm": 0.327194482088089,
      "learning_rate": 0.00017115408301514965,
      "loss": 0.3482,
      "step": 1176
    },
    {
      "epoch": 0.4346381093057607,
      "grad_norm": 0.3180336356163025,
      "learning_rate": 0.00017112944943958614,
      "loss": 0.3451,
      "step": 1177
    },
    {
      "epoch": 0.43500738552437224,
      "grad_norm": 0.3503369390964508,
      "learning_rate": 0.00017110481586402268,
      "loss": 0.4447,
      "step": 1178
    },
    {
      "epoch": 0.43537666174298373,
      "grad_norm": 0.24465790390968323,
      "learning_rate": 0.00017108018228845917,
      "loss": 0.3023,
      "step": 1179
    },
    {
      "epoch": 0.4357459379615953,
      "grad_norm": 0.2671799659729004,
      "learning_rate": 0.00017105554871289569,
      "loss": 0.3607,
      "step": 1180
    },
    {
      "epoch": 0.43611521418020677,
      "grad_norm": 0.26522642374038696,
      "learning_rate": 0.00017103091513733217,
      "loss": 0.3386,
      "step": 1181
    },
    {
      "epoch": 0.4364844903988183,
      "grad_norm": 0.2625667452812195,
      "learning_rate": 0.0001710062815617687,
      "loss": 0.3492,
      "step": 1182
    },
    {
      "epoch": 0.43685376661742986,
      "grad_norm": 0.250750869512558,
      "learning_rate": 0.0001709816479862052,
      "loss": 0.2949,
      "step": 1183
    },
    {
      "epoch": 0.43722304283604135,
      "grad_norm": 0.295329213142395,
      "learning_rate": 0.00017095701441064172,
      "loss": 0.3851,
      "step": 1184
    },
    {
      "epoch": 0.4375923190546529,
      "grad_norm": 0.267910361289978,
      "learning_rate": 0.0001709323808350782,
      "loss": 0.3315,
      "step": 1185
    },
    {
      "epoch": 0.4379615952732644,
      "grad_norm": 0.31502625346183777,
      "learning_rate": 0.00017090774725951472,
      "loss": 0.2795,
      "step": 1186
    },
    {
      "epoch": 0.43833087149187594,
      "grad_norm": 0.33922597765922546,
      "learning_rate": 0.00017088311368395123,
      "loss": 0.3036,
      "step": 1187
    },
    {
      "epoch": 0.43870014771048743,
      "grad_norm": 0.27864083647727966,
      "learning_rate": 0.00017085848010838775,
      "loss": 0.3189,
      "step": 1188
    },
    {
      "epoch": 0.439069423929099,
      "grad_norm": 0.3772332966327667,
      "learning_rate": 0.00017083384653282424,
      "loss": 0.3755,
      "step": 1189
    },
    {
      "epoch": 0.43943870014771047,
      "grad_norm": 0.2781737446784973,
      "learning_rate": 0.00017080921295726075,
      "loss": 0.3292,
      "step": 1190
    },
    {
      "epoch": 0.439807976366322,
      "grad_norm": 0.28551939129829407,
      "learning_rate": 0.00017078457938169726,
      "loss": 0.3977,
      "step": 1191
    },
    {
      "epoch": 0.4401772525849335,
      "grad_norm": 0.2906353771686554,
      "learning_rate": 0.00017075994580613378,
      "loss": 0.3369,
      "step": 1192
    },
    {
      "epoch": 0.44054652880354506,
      "grad_norm": 0.29700130224227905,
      "learning_rate": 0.00017073531223057027,
      "loss": 0.3162,
      "step": 1193
    },
    {
      "epoch": 0.44091580502215655,
      "grad_norm": 0.30474820733070374,
      "learning_rate": 0.00017071067865500678,
      "loss": 0.3618,
      "step": 1194
    },
    {
      "epoch": 0.4412850812407681,
      "grad_norm": 0.35836559534072876,
      "learning_rate": 0.00017068604507944327,
      "loss": 0.4259,
      "step": 1195
    },
    {
      "epoch": 0.44165435745937964,
      "grad_norm": 0.2899966239929199,
      "learning_rate": 0.0001706614115038798,
      "loss": 0.3495,
      "step": 1196
    },
    {
      "epoch": 0.44202363367799113,
      "grad_norm": 0.24901117384433746,
      "learning_rate": 0.0001706367779283163,
      "loss": 0.2851,
      "step": 1197
    },
    {
      "epoch": 0.4423929098966027,
      "grad_norm": 0.28132179379463196,
      "learning_rate": 0.0001706121443527528,
      "loss": 0.3194,
      "step": 1198
    },
    {
      "epoch": 0.4427621861152142,
      "grad_norm": 0.28281378746032715,
      "learning_rate": 0.0001705875107771893,
      "loss": 0.336,
      "step": 1199
    },
    {
      "epoch": 0.4431314623338257,
      "grad_norm": 0.26517656445503235,
      "learning_rate": 0.00017056287720162582,
      "loss": 0.3346,
      "step": 1200
    },
    {
      "epoch": 0.4431314623338257,
      "eval_loss": 0.3183690905570984,
      "eval_runtime": 5.856,
      "eval_samples_per_second": 8.538,
      "eval_steps_per_second": 1.195,
      "step": 1200
    },
    {
      "epoch": 0.4435007385524372,
      "grad_norm": 0.30609777569770813,
      "learning_rate": 0.00017053824362606233,
      "loss": 0.375,
      "step": 1201
    },
    {
      "epoch": 0.44387001477104876,
      "grad_norm": 0.2742772698402405,
      "learning_rate": 0.00017051361005049884,
      "loss": 0.2902,
      "step": 1202
    },
    {
      "epoch": 0.44423929098966025,
      "grad_norm": 0.28248992562294006,
      "learning_rate": 0.00017048897647493533,
      "loss": 0.3687,
      "step": 1203
    },
    {
      "epoch": 0.4446085672082718,
      "grad_norm": 0.23091770708560944,
      "learning_rate": 0.00017046434289937185,
      "loss": 0.306,
      "step": 1204
    },
    {
      "epoch": 0.4449778434268833,
      "grad_norm": 0.29506057500839233,
      "learning_rate": 0.00017043970932380836,
      "loss": 0.3557,
      "step": 1205
    },
    {
      "epoch": 0.44534711964549484,
      "grad_norm": 0.36342182755470276,
      "learning_rate": 0.00017041507574824488,
      "loss": 0.4046,
      "step": 1206
    },
    {
      "epoch": 0.44571639586410633,
      "grad_norm": 0.27799978852272034,
      "learning_rate": 0.00017039044217268136,
      "loss": 0.2897,
      "step": 1207
    },
    {
      "epoch": 0.4460856720827179,
      "grad_norm": 0.24548093974590302,
      "learning_rate": 0.00017036580859711788,
      "loss": 0.3065,
      "step": 1208
    },
    {
      "epoch": 0.44645494830132937,
      "grad_norm": 0.2660404443740845,
      "learning_rate": 0.00017034117502155437,
      "loss": 0.3195,
      "step": 1209
    },
    {
      "epoch": 0.4468242245199409,
      "grad_norm": 0.268492192029953,
      "learning_rate": 0.0001703165414459909,
      "loss": 0.3035,
      "step": 1210
    },
    {
      "epoch": 0.44719350073855246,
      "grad_norm": 0.28435540199279785,
      "learning_rate": 0.0001702919078704274,
      "loss": 0.3665,
      "step": 1211
    },
    {
      "epoch": 0.44756277695716395,
      "grad_norm": 0.27771124243736267,
      "learning_rate": 0.0001702672742948639,
      "loss": 0.3397,
      "step": 1212
    },
    {
      "epoch": 0.4479320531757755,
      "grad_norm": 0.28504887223243713,
      "learning_rate": 0.0001702426407193004,
      "loss": 0.2854,
      "step": 1213
    },
    {
      "epoch": 0.448301329394387,
      "grad_norm": 0.24859274923801422,
      "learning_rate": 0.0001702180071437369,
      "loss": 0.2592,
      "step": 1214
    },
    {
      "epoch": 0.44867060561299854,
      "grad_norm": 0.21828439831733704,
      "learning_rate": 0.00017019337356817343,
      "loss": 0.2612,
      "step": 1215
    },
    {
      "epoch": 0.44903988183161003,
      "grad_norm": 0.29936301708221436,
      "learning_rate": 0.00017016873999260994,
      "loss": 0.3574,
      "step": 1216
    },
    {
      "epoch": 0.4494091580502216,
      "grad_norm": 0.30142107605934143,
      "learning_rate": 0.00017014410641704643,
      "loss": 0.3598,
      "step": 1217
    },
    {
      "epoch": 0.44977843426883307,
      "grad_norm": 0.2668401598930359,
      "learning_rate": 0.00017011947284148294,
      "loss": 0.2643,
      "step": 1218
    },
    {
      "epoch": 0.4501477104874446,
      "grad_norm": 0.33149152994155884,
      "learning_rate": 0.00017009483926591946,
      "loss": 0.3488,
      "step": 1219
    },
    {
      "epoch": 0.4505169867060561,
      "grad_norm": 0.26323407888412476,
      "learning_rate": 0.00017007020569035597,
      "loss": 0.2844,
      "step": 1220
    },
    {
      "epoch": 0.45088626292466766,
      "grad_norm": 0.26985248923301697,
      "learning_rate": 0.00017004557211479246,
      "loss": 0.3211,
      "step": 1221
    },
    {
      "epoch": 0.45125553914327915,
      "grad_norm": 0.22127485275268555,
      "learning_rate": 0.00017002093853922897,
      "loss": 0.2934,
      "step": 1222
    },
    {
      "epoch": 0.4516248153618907,
      "grad_norm": 0.2990404963493347,
      "learning_rate": 0.0001699963049636655,
      "loss": 0.3384,
      "step": 1223
    },
    {
      "epoch": 0.4519940915805022,
      "grad_norm": 0.3148226737976074,
      "learning_rate": 0.000169971671388102,
      "loss": 0.3381,
      "step": 1224
    },
    {
      "epoch": 0.45236336779911374,
      "grad_norm": 0.2582751512527466,
      "learning_rate": 0.0001699470378125385,
      "loss": 0.3513,
      "step": 1225
    },
    {
      "epoch": 0.4527326440177253,
      "grad_norm": 0.30039259791374207,
      "learning_rate": 0.000169922404236975,
      "loss": 0.3377,
      "step": 1226
    },
    {
      "epoch": 0.4531019202363368,
      "grad_norm": 0.26333391666412354,
      "learning_rate": 0.0001698977706614115,
      "loss": 0.3394,
      "step": 1227
    },
    {
      "epoch": 0.4534711964549483,
      "grad_norm": 0.29885348677635193,
      "learning_rate": 0.00016987313708584804,
      "loss": 0.4602,
      "step": 1228
    },
    {
      "epoch": 0.4538404726735598,
      "grad_norm": 0.2640257477760315,
      "learning_rate": 0.00016984850351028452,
      "loss": 0.3024,
      "step": 1229
    },
    {
      "epoch": 0.45420974889217136,
      "grad_norm": 0.2874453365802765,
      "learning_rate": 0.00016982386993472104,
      "loss": 0.3861,
      "step": 1230
    },
    {
      "epoch": 0.45457902511078285,
      "grad_norm": 0.3098587691783905,
      "learning_rate": 0.00016979923635915753,
      "loss": 0.3435,
      "step": 1231
    },
    {
      "epoch": 0.4549483013293944,
      "grad_norm": 0.28761473298072815,
      "learning_rate": 0.00016977460278359404,
      "loss": 0.3297,
      "step": 1232
    },
    {
      "epoch": 0.4553175775480059,
      "grad_norm": 0.29999786615371704,
      "learning_rate": 0.00016974996920803055,
      "loss": 0.3161,
      "step": 1233
    },
    {
      "epoch": 0.45568685376661744,
      "grad_norm": 0.28180021047592163,
      "learning_rate": 0.00016972533563246707,
      "loss": 0.3148,
      "step": 1234
    },
    {
      "epoch": 0.45605612998522893,
      "grad_norm": 0.2969822585582733,
      "learning_rate": 0.00016970070205690356,
      "loss": 0.3434,
      "step": 1235
    },
    {
      "epoch": 0.4564254062038405,
      "grad_norm": 0.32349124550819397,
      "learning_rate": 0.00016967606848134007,
      "loss": 0.3623,
      "step": 1236
    },
    {
      "epoch": 0.45679468242245197,
      "grad_norm": 0.24902084469795227,
      "learning_rate": 0.00016965143490577659,
      "loss": 0.2848,
      "step": 1237
    },
    {
      "epoch": 0.4571639586410635,
      "grad_norm": 0.262531042098999,
      "learning_rate": 0.0001696268013302131,
      "loss": 0.2806,
      "step": 1238
    },
    {
      "epoch": 0.45753323485967506,
      "grad_norm": 0.27351832389831543,
      "learning_rate": 0.0001696021677546496,
      "loss": 0.3061,
      "step": 1239
    },
    {
      "epoch": 0.45790251107828656,
      "grad_norm": 0.27112317085266113,
      "learning_rate": 0.0001695775341790861,
      "loss": 0.3001,
      "step": 1240
    },
    {
      "epoch": 0.4582717872968981,
      "grad_norm": 0.4085458219051361,
      "learning_rate": 0.0001695529006035226,
      "loss": 0.3857,
      "step": 1241
    },
    {
      "epoch": 0.4586410635155096,
      "grad_norm": 0.25504735112190247,
      "learning_rate": 0.00016952826702795913,
      "loss": 0.2865,
      "step": 1242
    },
    {
      "epoch": 0.45901033973412114,
      "grad_norm": 0.2736992835998535,
      "learning_rate": 0.00016950363345239562,
      "loss": 0.3633,
      "step": 1243
    },
    {
      "epoch": 0.45937961595273263,
      "grad_norm": 0.2200980931520462,
      "learning_rate": 0.00016947899987683213,
      "loss": 0.2461,
      "step": 1244
    },
    {
      "epoch": 0.4597488921713442,
      "grad_norm": 0.27418121695518494,
      "learning_rate": 0.00016945436630126862,
      "loss": 0.3235,
      "step": 1245
    },
    {
      "epoch": 0.4601181683899557,
      "grad_norm": 0.25174984335899353,
      "learning_rate": 0.00016942973272570514,
      "loss": 0.2813,
      "step": 1246
    },
    {
      "epoch": 0.4604874446085672,
      "grad_norm": 0.23484735190868378,
      "learning_rate": 0.00016940509915014165,
      "loss": 0.2863,
      "step": 1247
    },
    {
      "epoch": 0.4608567208271787,
      "grad_norm": 0.2589341104030609,
      "learning_rate": 0.00016938046557457817,
      "loss": 0.2789,
      "step": 1248
    },
    {
      "epoch": 0.46122599704579026,
      "grad_norm": 0.26269158720970154,
      "learning_rate": 0.00016935583199901465,
      "loss": 0.3234,
      "step": 1249
    },
    {
      "epoch": 0.46159527326440175,
      "grad_norm": 0.2763214707374573,
      "learning_rate": 0.00016933119842345117,
      "loss": 0.3252,
      "step": 1250
    },
    {
      "epoch": 0.46159527326440175,
      "eval_loss": 0.31730917096138,
      "eval_runtime": 5.8523,
      "eval_samples_per_second": 8.544,
      "eval_steps_per_second": 1.196,
      "step": 1250
    },
    {
      "epoch": 0.4619645494830133,
      "grad_norm": 0.35904350876808167,
      "learning_rate": 0.00016930656484788768,
      "loss": 0.3457,
      "step": 1251
    },
    {
      "epoch": 0.4623338257016248,
      "grad_norm": 0.2199958711862564,
      "learning_rate": 0.0001692819312723242,
      "loss": 0.2742,
      "step": 1252
    },
    {
      "epoch": 0.46270310192023634,
      "grad_norm": 0.3560868501663208,
      "learning_rate": 0.00016925729769676068,
      "loss": 0.4143,
      "step": 1253
    },
    {
      "epoch": 0.4630723781388479,
      "grad_norm": 0.2781903147697449,
      "learning_rate": 0.0001692326641211972,
      "loss": 0.316,
      "step": 1254
    },
    {
      "epoch": 0.4634416543574594,
      "grad_norm": 0.2549719512462616,
      "learning_rate": 0.0001692080305456337,
      "loss": 0.2591,
      "step": 1255
    },
    {
      "epoch": 0.4638109305760709,
      "grad_norm": 0.2696338891983032,
      "learning_rate": 0.00016918339697007023,
      "loss": 0.3245,
      "step": 1256
    },
    {
      "epoch": 0.4641802067946824,
      "grad_norm": 0.2692526876926422,
      "learning_rate": 0.00016915876339450672,
      "loss": 0.3394,
      "step": 1257
    },
    {
      "epoch": 0.46454948301329396,
      "grad_norm": 0.2564251720905304,
      "learning_rate": 0.00016913412981894323,
      "loss": 0.2974,
      "step": 1258
    },
    {
      "epoch": 0.46491875923190545,
      "grad_norm": 0.29035502672195435,
      "learning_rate": 0.00016910949624337972,
      "loss": 0.3674,
      "step": 1259
    },
    {
      "epoch": 0.465288035450517,
      "grad_norm": 0.34713611006736755,
      "learning_rate": 0.00016908486266781623,
      "loss": 0.4094,
      "step": 1260
    },
    {
      "epoch": 0.4656573116691285,
      "grad_norm": 0.302212119102478,
      "learning_rate": 0.00016906022909225275,
      "loss": 0.2891,
      "step": 1261
    },
    {
      "epoch": 0.46602658788774004,
      "grad_norm": 0.27764269709587097,
      "learning_rate": 0.00016903559551668926,
      "loss": 0.3353,
      "step": 1262
    },
    {
      "epoch": 0.46639586410635153,
      "grad_norm": 0.32426977157592773,
      "learning_rate": 0.00016901096194112575,
      "loss": 0.4366,
      "step": 1263
    },
    {
      "epoch": 0.4667651403249631,
      "grad_norm": 0.2631196081638336,
      "learning_rate": 0.00016898632836556226,
      "loss": 0.4091,
      "step": 1264
    },
    {
      "epoch": 0.46713441654357457,
      "grad_norm": 0.29156410694122314,
      "learning_rate": 0.00016896169478999878,
      "loss": 0.3326,
      "step": 1265
    },
    {
      "epoch": 0.4675036927621861,
      "grad_norm": 0.2738753855228424,
      "learning_rate": 0.0001689370612144353,
      "loss": 0.2922,
      "step": 1266
    },
    {
      "epoch": 0.4678729689807976,
      "grad_norm": 0.24467527866363525,
      "learning_rate": 0.00016891242763887178,
      "loss": 0.2921,
      "step": 1267
    },
    {
      "epoch": 0.46824224519940916,
      "grad_norm": 0.28518614172935486,
      "learning_rate": 0.0001688877940633083,
      "loss": 0.3344,
      "step": 1268
    },
    {
      "epoch": 0.4686115214180207,
      "grad_norm": 0.2887619435787201,
      "learning_rate": 0.0001688631604877448,
      "loss": 0.3639,
      "step": 1269
    },
    {
      "epoch": 0.4689807976366322,
      "grad_norm": 0.2452942281961441,
      "learning_rate": 0.00016883852691218133,
      "loss": 0.2884,
      "step": 1270
    },
    {
      "epoch": 0.46935007385524374,
      "grad_norm": 0.305033415555954,
      "learning_rate": 0.0001688138933366178,
      "loss": 0.3835,
      "step": 1271
    },
    {
      "epoch": 0.46971935007385524,
      "grad_norm": 0.26679572463035583,
      "learning_rate": 0.00016878925976105433,
      "loss": 0.3163,
      "step": 1272
    },
    {
      "epoch": 0.4700886262924668,
      "grad_norm": 0.2737630307674408,
      "learning_rate": 0.00016876462618549081,
      "loss": 0.3086,
      "step": 1273
    },
    {
      "epoch": 0.4704579025110783,
      "grad_norm": 0.24957697093486786,
      "learning_rate": 0.00016873999260992736,
      "loss": 0.3688,
      "step": 1274
    },
    {
      "epoch": 0.4708271787296898,
      "grad_norm": 0.30872097611427307,
      "learning_rate": 0.00016871535903436384,
      "loss": 0.3559,
      "step": 1275
    },
    {
      "epoch": 0.4711964549483013,
      "grad_norm": 0.31919223070144653,
      "learning_rate": 0.00016869072545880036,
      "loss": 0.4033,
      "step": 1276
    },
    {
      "epoch": 0.47156573116691286,
      "grad_norm": 0.2940533459186554,
      "learning_rate": 0.00016866609188323685,
      "loss": 0.3477,
      "step": 1277
    },
    {
      "epoch": 0.47193500738552435,
      "grad_norm": 0.27033281326293945,
      "learning_rate": 0.00016864145830767336,
      "loss": 0.3523,
      "step": 1278
    },
    {
      "epoch": 0.4723042836041359,
      "grad_norm": 0.2709430158138275,
      "learning_rate": 0.00016861682473210988,
      "loss": 0.299,
      "step": 1279
    },
    {
      "epoch": 0.4726735598227474,
      "grad_norm": 0.2631966173648834,
      "learning_rate": 0.0001685921911565464,
      "loss": 0.3367,
      "step": 1280
    },
    {
      "epoch": 0.47304283604135894,
      "grad_norm": 0.3195972740650177,
      "learning_rate": 0.00016856755758098288,
      "loss": 0.3934,
      "step": 1281
    },
    {
      "epoch": 0.4734121122599705,
      "grad_norm": 0.26415082812309265,
      "learning_rate": 0.0001685429240054194,
      "loss": 0.3251,
      "step": 1282
    },
    {
      "epoch": 0.473781388478582,
      "grad_norm": 0.27375754714012146,
      "learning_rate": 0.0001685182904298559,
      "loss": 0.3805,
      "step": 1283
    },
    {
      "epoch": 0.4741506646971935,
      "grad_norm": 0.2656939625740051,
      "learning_rate": 0.00016849365685429242,
      "loss": 0.3021,
      "step": 1284
    },
    {
      "epoch": 0.474519940915805,
      "grad_norm": 0.2884897291660309,
      "learning_rate": 0.0001684690232787289,
      "loss": 0.343,
      "step": 1285
    },
    {
      "epoch": 0.47488921713441656,
      "grad_norm": 0.26789557933807373,
      "learning_rate": 0.00016844438970316542,
      "loss": 0.3184,
      "step": 1286
    },
    {
      "epoch": 0.47525849335302806,
      "grad_norm": 0.3235875964164734,
      "learning_rate": 0.0001684197561276019,
      "loss": 0.3812,
      "step": 1287
    },
    {
      "epoch": 0.4756277695716396,
      "grad_norm": 0.31186652183532715,
      "learning_rate": 0.00016839512255203845,
      "loss": 0.3684,
      "step": 1288
    },
    {
      "epoch": 0.4759970457902511,
      "grad_norm": 0.29642534255981445,
      "learning_rate": 0.00016837048897647494,
      "loss": 0.3309,
      "step": 1289
    },
    {
      "epoch": 0.47636632200886264,
      "grad_norm": 0.27301859855651855,
      "learning_rate": 0.00016834585540091146,
      "loss": 0.3301,
      "step": 1290
    },
    {
      "epoch": 0.47673559822747413,
      "grad_norm": 0.36492156982421875,
      "learning_rate": 0.00016832122182534794,
      "loss": 0.3854,
      "step": 1291
    },
    {
      "epoch": 0.4771048744460857,
      "grad_norm": 0.36236897110939026,
      "learning_rate": 0.00016829658824978446,
      "loss": 0.4151,
      "step": 1292
    },
    {
      "epoch": 0.4774741506646972,
      "grad_norm": 0.3140570819377899,
      "learning_rate": 0.00016827195467422097,
      "loss": 0.3772,
      "step": 1293
    },
    {
      "epoch": 0.4778434268833087,
      "grad_norm": 0.2550193667411804,
      "learning_rate": 0.0001682473210986575,
      "loss": 0.2802,
      "step": 1294
    },
    {
      "epoch": 0.4782127031019202,
      "grad_norm": 0.233677476644516,
      "learning_rate": 0.00016822268752309397,
      "loss": 0.2433,
      "step": 1295
    },
    {
      "epoch": 0.47858197932053176,
      "grad_norm": 0.2635647654533386,
      "learning_rate": 0.0001681980539475305,
      "loss": 0.2876,
      "step": 1296
    },
    {
      "epoch": 0.4789512555391433,
      "grad_norm": 0.2737426459789276,
      "learning_rate": 0.000168173420371967,
      "loss": 0.3753,
      "step": 1297
    },
    {
      "epoch": 0.4793205317577548,
      "grad_norm": 0.2624737322330475,
      "learning_rate": 0.00016814878679640352,
      "loss": 0.3745,
      "step": 1298
    },
    {
      "epoch": 0.47968980797636634,
      "grad_norm": 0.26850634813308716,
      "learning_rate": 0.00016812415322084,
      "loss": 0.3085,
      "step": 1299
    },
    {
      "epoch": 0.48005908419497784,
      "grad_norm": 0.3231109082698822,
      "learning_rate": 0.00016809951964527652,
      "loss": 0.3975,
      "step": 1300
    },
    {
      "epoch": 0.48005908419497784,
      "eval_loss": 0.31863901019096375,
      "eval_runtime": 5.8691,
      "eval_samples_per_second": 8.519,
      "eval_steps_per_second": 1.193,
      "step": 1300
    },
    {
      "epoch": 0.4804283604135894,
      "grad_norm": 0.27670300006866455,
      "learning_rate": 0.00016807488606971303,
      "loss": 0.3309,
      "step": 1301
    },
    {
      "epoch": 0.4807976366322009,
      "grad_norm": 0.32811930775642395,
      "learning_rate": 0.00016805025249414955,
      "loss": 0.3523,
      "step": 1302
    },
    {
      "epoch": 0.4811669128508124,
      "grad_norm": 0.2851490378379822,
      "learning_rate": 0.00016802561891858604,
      "loss": 0.3168,
      "step": 1303
    },
    {
      "epoch": 0.4815361890694239,
      "grad_norm": 0.3192996680736542,
      "learning_rate": 0.00016800098534302255,
      "loss": 0.2723,
      "step": 1304
    },
    {
      "epoch": 0.48190546528803546,
      "grad_norm": 0.24803690612316132,
      "learning_rate": 0.00016797635176745904,
      "loss": 0.3068,
      "step": 1305
    },
    {
      "epoch": 0.48227474150664695,
      "grad_norm": 0.2650405466556549,
      "learning_rate": 0.00016795171819189558,
      "loss": 0.242,
      "step": 1306
    },
    {
      "epoch": 0.4826440177252585,
      "grad_norm": 0.2497330904006958,
      "learning_rate": 0.00016792708461633207,
      "loss": 0.2766,
      "step": 1307
    },
    {
      "epoch": 0.48301329394387,
      "grad_norm": 0.2995474934577942,
      "learning_rate": 0.00016790245104076858,
      "loss": 0.3323,
      "step": 1308
    },
    {
      "epoch": 0.48338257016248154,
      "grad_norm": 0.25276845693588257,
      "learning_rate": 0.00016787781746520507,
      "loss": 0.2761,
      "step": 1309
    },
    {
      "epoch": 0.48375184638109303,
      "grad_norm": 0.3646930158138275,
      "learning_rate": 0.00016785318388964159,
      "loss": 0.3951,
      "step": 1310
    },
    {
      "epoch": 0.4841211225997046,
      "grad_norm": 0.27568763494491577,
      "learning_rate": 0.0001678285503140781,
      "loss": 0.3092,
      "step": 1311
    },
    {
      "epoch": 0.4844903988183161,
      "grad_norm": 0.327421635389328,
      "learning_rate": 0.00016780391673851461,
      "loss": 0.3603,
      "step": 1312
    },
    {
      "epoch": 0.4848596750369276,
      "grad_norm": 0.3478201925754547,
      "learning_rate": 0.0001677792831629511,
      "loss": 0.3946,
      "step": 1313
    },
    {
      "epoch": 0.48522895125553916,
      "grad_norm": 0.35054340958595276,
      "learning_rate": 0.00016775464958738762,
      "loss": 0.3769,
      "step": 1314
    },
    {
      "epoch": 0.48559822747415066,
      "grad_norm": 0.2698875665664673,
      "learning_rate": 0.00016773001601182413,
      "loss": 0.3266,
      "step": 1315
    },
    {
      "epoch": 0.4859675036927622,
      "grad_norm": 0.27946022152900696,
      "learning_rate": 0.00016770538243626065,
      "loss": 0.2999,
      "step": 1316
    },
    {
      "epoch": 0.4863367799113737,
      "grad_norm": 0.2864866256713867,
      "learning_rate": 0.00016768074886069713,
      "loss": 0.3329,
      "step": 1317
    },
    {
      "epoch": 0.48670605612998524,
      "grad_norm": 0.269000768661499,
      "learning_rate": 0.00016765611528513365,
      "loss": 0.2967,
      "step": 1318
    },
    {
      "epoch": 0.48707533234859673,
      "grad_norm": 0.3208789527416229,
      "learning_rate": 0.00016763148170957014,
      "loss": 0.3269,
      "step": 1319
    },
    {
      "epoch": 0.4874446085672083,
      "grad_norm": 0.2848138213157654,
      "learning_rate": 0.00016760684813400668,
      "loss": 0.3035,
      "step": 1320
    },
    {
      "epoch": 0.4878138847858198,
      "grad_norm": 0.268852561712265,
      "learning_rate": 0.00016758221455844317,
      "loss": 0.3338,
      "step": 1321
    },
    {
      "epoch": 0.4881831610044313,
      "grad_norm": 0.2637290954589844,
      "learning_rate": 0.00016755758098287968,
      "loss": 0.2747,
      "step": 1322
    },
    {
      "epoch": 0.4885524372230428,
      "grad_norm": 0.2860165238380432,
      "learning_rate": 0.00016753294740731617,
      "loss": 0.2727,
      "step": 1323
    },
    {
      "epoch": 0.48892171344165436,
      "grad_norm": 0.3032236695289612,
      "learning_rate": 0.00016750831383175268,
      "loss": 0.3854,
      "step": 1324
    },
    {
      "epoch": 0.48929098966026585,
      "grad_norm": 0.27861616015434265,
      "learning_rate": 0.0001674836802561892,
      "loss": 0.33,
      "step": 1325
    },
    {
      "epoch": 0.4896602658788774,
      "grad_norm": 0.339982271194458,
      "learning_rate": 0.0001674590466806257,
      "loss": 0.341,
      "step": 1326
    },
    {
      "epoch": 0.49002954209748895,
      "grad_norm": 0.21589699387550354,
      "learning_rate": 0.0001674344131050622,
      "loss": 0.287,
      "step": 1327
    },
    {
      "epoch": 0.49039881831610044,
      "grad_norm": 0.2754301428794861,
      "learning_rate": 0.0001674097795294987,
      "loss": 0.3341,
      "step": 1328
    },
    {
      "epoch": 0.490768094534712,
      "grad_norm": 0.2732886075973511,
      "learning_rate": 0.00016738514595393523,
      "loss": 0.3055,
      "step": 1329
    },
    {
      "epoch": 0.4911373707533235,
      "grad_norm": 0.27740296721458435,
      "learning_rate": 0.00016736051237837174,
      "loss": 0.3335,
      "step": 1330
    },
    {
      "epoch": 0.491506646971935,
      "grad_norm": 0.29700103402137756,
      "learning_rate": 0.00016733587880280823,
      "loss": 0.3403,
      "step": 1331
    },
    {
      "epoch": 0.4918759231905465,
      "grad_norm": 0.23043584823608398,
      "learning_rate": 0.00016731124522724474,
      "loss": 0.2416,
      "step": 1332
    },
    {
      "epoch": 0.49224519940915806,
      "grad_norm": 0.25385019183158875,
      "learning_rate": 0.00016728661165168126,
      "loss": 0.2833,
      "step": 1333
    },
    {
      "epoch": 0.49261447562776955,
      "grad_norm": 0.23877032101154327,
      "learning_rate": 0.00016726197807611777,
      "loss": 0.3121,
      "step": 1334
    },
    {
      "epoch": 0.4929837518463811,
      "grad_norm": 0.34636810421943665,
      "learning_rate": 0.00016723734450055426,
      "loss": 0.3682,
      "step": 1335
    },
    {
      "epoch": 0.4933530280649926,
      "grad_norm": 0.27772367000579834,
      "learning_rate": 0.00016721271092499078,
      "loss": 0.3362,
      "step": 1336
    },
    {
      "epoch": 0.49372230428360414,
      "grad_norm": 0.26868563890457153,
      "learning_rate": 0.00016718807734942726,
      "loss": 0.3536,
      "step": 1337
    },
    {
      "epoch": 0.49409158050221563,
      "grad_norm": 0.23275655508041382,
      "learning_rate": 0.0001671634437738638,
      "loss": 0.2636,
      "step": 1338
    },
    {
      "epoch": 0.4944608567208272,
      "grad_norm": 0.2450389862060547,
      "learning_rate": 0.0001671388101983003,
      "loss": 0.3104,
      "step": 1339
    },
    {
      "epoch": 0.4948301329394387,
      "grad_norm": 0.2956058979034424,
      "learning_rate": 0.0001671141766227368,
      "loss": 0.3747,
      "step": 1340
    },
    {
      "epoch": 0.4951994091580502,
      "grad_norm": 0.3192928433418274,
      "learning_rate": 0.0001670895430471733,
      "loss": 0.3266,
      "step": 1341
    },
    {
      "epoch": 0.49556868537666177,
      "grad_norm": 0.2584354281425476,
      "learning_rate": 0.0001670649094716098,
      "loss": 0.3121,
      "step": 1342
    },
    {
      "epoch": 0.49593796159527326,
      "grad_norm": 0.3440920114517212,
      "learning_rate": 0.00016704027589604632,
      "loss": 0.3531,
      "step": 1343
    },
    {
      "epoch": 0.4963072378138848,
      "grad_norm": 0.283672958612442,
      "learning_rate": 0.00016701564232048284,
      "loss": 0.3271,
      "step": 1344
    },
    {
      "epoch": 0.4966765140324963,
      "grad_norm": 0.27528640627861023,
      "learning_rate": 0.00016699100874491933,
      "loss": 0.371,
      "step": 1345
    },
    {
      "epoch": 0.49704579025110784,
      "grad_norm": 0.3449751138687134,
      "learning_rate": 0.00016696637516935584,
      "loss": 0.3208,
      "step": 1346
    },
    {
      "epoch": 0.49741506646971934,
      "grad_norm": 0.2928100526332855,
      "learning_rate": 0.00016694174159379236,
      "loss": 0.3253,
      "step": 1347
    },
    {
      "epoch": 0.4977843426883309,
      "grad_norm": 0.27168670296669006,
      "learning_rate": 0.00016691710801822887,
      "loss": 0.3631,
      "step": 1348
    },
    {
      "epoch": 0.4981536189069424,
      "grad_norm": 0.2860596179962158,
      "learning_rate": 0.00016689247444266536,
      "loss": 0.3608,
      "step": 1349
    },
    {
      "epoch": 0.4985228951255539,
      "grad_norm": 0.2555064857006073,
      "learning_rate": 0.00016686784086710187,
      "loss": 0.2899,
      "step": 1350
    },
    {
      "epoch": 0.4985228951255539,
      "eval_loss": 0.32421794533729553,
      "eval_runtime": 6.0917,
      "eval_samples_per_second": 8.208,
      "eval_steps_per_second": 1.149,
      "step": 1350
    },
    {
      "epoch": 0.4988921713441654,
      "grad_norm": 0.26284271478652954,
      "learning_rate": 0.00016684320729153836,
      "loss": 0.2863,
      "step": 1351
    },
    {
      "epoch": 0.49926144756277696,
      "grad_norm": 0.2949911653995514,
      "learning_rate": 0.0001668185737159749,
      "loss": 0.3462,
      "step": 1352
    },
    {
      "epoch": 0.49963072378138845,
      "grad_norm": 0.24794632196426392,
      "learning_rate": 0.0001667939401404114,
      "loss": 0.2472,
      "step": 1353
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.23740360140800476,
      "learning_rate": 0.0001667693065648479,
      "loss": 0.2951,
      "step": 1354
    },
    {
      "epoch": 0.5003692762186115,
      "grad_norm": 0.3350389897823334,
      "learning_rate": 0.0001667446729892844,
      "loss": 0.391,
      "step": 1355
    },
    {
      "epoch": 0.5007385524372231,
      "grad_norm": 0.24600738286972046,
      "learning_rate": 0.0001667200394137209,
      "loss": 0.2722,
      "step": 1356
    },
    {
      "epoch": 0.5011078286558346,
      "grad_norm": 0.3570854961872101,
      "learning_rate": 0.00016669540583815742,
      "loss": 0.4061,
      "step": 1357
    },
    {
      "epoch": 0.5014771048744461,
      "grad_norm": 0.2860664427280426,
      "learning_rate": 0.00016667077226259394,
      "loss": 0.3423,
      "step": 1358
    },
    {
      "epoch": 0.5018463810930576,
      "grad_norm": 0.27321067452430725,
      "learning_rate": 0.00016664613868703042,
      "loss": 0.3348,
      "step": 1359
    },
    {
      "epoch": 0.5022156573116692,
      "grad_norm": 0.31462761759757996,
      "learning_rate": 0.00016662150511146694,
      "loss": 0.3204,
      "step": 1360
    },
    {
      "epoch": 0.5025849335302807,
      "grad_norm": 0.3128102421760559,
      "learning_rate": 0.00016659687153590345,
      "loss": 0.3525,
      "step": 1361
    },
    {
      "epoch": 0.5029542097488922,
      "grad_norm": 0.3167819380760193,
      "learning_rate": 0.00016657223796033997,
      "loss": 0.3246,
      "step": 1362
    },
    {
      "epoch": 0.5033234859675036,
      "grad_norm": 0.25416892766952515,
      "learning_rate": 0.00016654760438477645,
      "loss": 0.2934,
      "step": 1363
    },
    {
      "epoch": 0.5036927621861153,
      "grad_norm": 0.32133153080940247,
      "learning_rate": 0.00016652297080921297,
      "loss": 0.3408,
      "step": 1364
    },
    {
      "epoch": 0.5040620384047267,
      "grad_norm": 0.29662302136421204,
      "learning_rate": 0.00016649833723364948,
      "loss": 0.3164,
      "step": 1365
    },
    {
      "epoch": 0.5044313146233382,
      "grad_norm": 0.2954084277153015,
      "learning_rate": 0.000166473703658086,
      "loss": 0.364,
      "step": 1366
    },
    {
      "epoch": 0.5048005908419497,
      "grad_norm": 0.3069119453430176,
      "learning_rate": 0.00016644907008252249,
      "loss": 0.3354,
      "step": 1367
    },
    {
      "epoch": 0.5051698670605613,
      "grad_norm": 0.28633591532707214,
      "learning_rate": 0.000166424436506959,
      "loss": 0.3643,
      "step": 1368
    },
    {
      "epoch": 0.5055391432791728,
      "grad_norm": 0.30108842253685,
      "learning_rate": 0.0001663998029313955,
      "loss": 0.3745,
      "step": 1369
    },
    {
      "epoch": 0.5059084194977843,
      "grad_norm": 0.2561410963535309,
      "learning_rate": 0.00016637516935583203,
      "loss": 0.2836,
      "step": 1370
    },
    {
      "epoch": 0.5062776957163959,
      "grad_norm": 0.3111201226711273,
      "learning_rate": 0.00016635053578026852,
      "loss": 0.3228,
      "step": 1371
    },
    {
      "epoch": 0.5066469719350074,
      "grad_norm": 0.41428470611572266,
      "learning_rate": 0.00016632590220470503,
      "loss": 0.3925,
      "step": 1372
    },
    {
      "epoch": 0.5070162481536189,
      "grad_norm": 0.28226950764656067,
      "learning_rate": 0.00016630126862914152,
      "loss": 0.3106,
      "step": 1373
    },
    {
      "epoch": 0.5073855243722304,
      "grad_norm": 0.25619128346443176,
      "learning_rate": 0.00016627663505357803,
      "loss": 0.2821,
      "step": 1374
    },
    {
      "epoch": 0.507754800590842,
      "grad_norm": 0.27361437678337097,
      "learning_rate": 0.00016625200147801455,
      "loss": 0.3405,
      "step": 1375
    },
    {
      "epoch": 0.5081240768094535,
      "grad_norm": 0.2697855830192566,
      "learning_rate": 0.00016622736790245106,
      "loss": 0.3282,
      "step": 1376
    },
    {
      "epoch": 0.508493353028065,
      "grad_norm": 0.3352266550064087,
      "learning_rate": 0.00016620273432688755,
      "loss": 0.3868,
      "step": 1377
    },
    {
      "epoch": 0.5088626292466765,
      "grad_norm": 0.27453646063804626,
      "learning_rate": 0.00016617810075132407,
      "loss": 0.2805,
      "step": 1378
    },
    {
      "epoch": 0.5092319054652881,
      "grad_norm": 0.2632004916667938,
      "learning_rate": 0.00016615346717576058,
      "loss": 0.3112,
      "step": 1379
    },
    {
      "epoch": 0.5096011816838996,
      "grad_norm": 0.2820645272731781,
      "learning_rate": 0.0001661288336001971,
      "loss": 0.3243,
      "step": 1380
    },
    {
      "epoch": 0.509970457902511,
      "grad_norm": 0.2841394543647766,
      "learning_rate": 0.00016610420002463358,
      "loss": 0.356,
      "step": 1381
    },
    {
      "epoch": 0.5103397341211225,
      "grad_norm": 0.24863271415233612,
      "learning_rate": 0.0001660795664490701,
      "loss": 0.2861,
      "step": 1382
    },
    {
      "epoch": 0.5107090103397341,
      "grad_norm": 0.33343276381492615,
      "learning_rate": 0.00016605493287350658,
      "loss": 0.3359,
      "step": 1383
    },
    {
      "epoch": 0.5110782865583456,
      "grad_norm": 0.3108684718608856,
      "learning_rate": 0.00016603029929794313,
      "loss": 0.383,
      "step": 1384
    },
    {
      "epoch": 0.5114475627769571,
      "grad_norm": 0.2670239210128784,
      "learning_rate": 0.00016600566572237961,
      "loss": 0.3371,
      "step": 1385
    },
    {
      "epoch": 0.5118168389955687,
      "grad_norm": 0.19630670547485352,
      "learning_rate": 0.00016598103214681613,
      "loss": 0.2472,
      "step": 1386
    },
    {
      "epoch": 0.5121861152141802,
      "grad_norm": 0.28781604766845703,
      "learning_rate": 0.00016595639857125262,
      "loss": 0.3235,
      "step": 1387
    },
    {
      "epoch": 0.5125553914327917,
      "grad_norm": 0.2403174489736557,
      "learning_rate": 0.00016593176499568913,
      "loss": 0.2812,
      "step": 1388
    },
    {
      "epoch": 0.5129246676514032,
      "grad_norm": 0.39566493034362793,
      "learning_rate": 0.00016590713142012565,
      "loss": 0.3435,
      "step": 1389
    },
    {
      "epoch": 0.5132939438700148,
      "grad_norm": 0.26088348031044006,
      "learning_rate": 0.00016588249784456216,
      "loss": 0.3114,
      "step": 1390
    },
    {
      "epoch": 0.5136632200886263,
      "grad_norm": 0.21565835177898407,
      "learning_rate": 0.00016585786426899865,
      "loss": 0.2756,
      "step": 1391
    },
    {
      "epoch": 0.5140324963072378,
      "grad_norm": 0.2134483903646469,
      "learning_rate": 0.00016583323069343516,
      "loss": 0.2684,
      "step": 1392
    },
    {
      "epoch": 0.5144017725258493,
      "grad_norm": 0.30606624484062195,
      "learning_rate": 0.00016580859711787168,
      "loss": 0.4073,
      "step": 1393
    },
    {
      "epoch": 0.5147710487444609,
      "grad_norm": 0.3195679485797882,
      "learning_rate": 0.0001657839635423082,
      "loss": 0.2985,
      "step": 1394
    },
    {
      "epoch": 0.5151403249630724,
      "grad_norm": 0.28844207525253296,
      "learning_rate": 0.00016575932996674468,
      "loss": 0.3097,
      "step": 1395
    },
    {
      "epoch": 0.5155096011816839,
      "grad_norm": 0.2352343499660492,
      "learning_rate": 0.0001657346963911812,
      "loss": 0.242,
      "step": 1396
    },
    {
      "epoch": 0.5158788774002954,
      "grad_norm": 0.2848149240016937,
      "learning_rate": 0.00016571006281561768,
      "loss": 0.3088,
      "step": 1397
    },
    {
      "epoch": 0.516248153618907,
      "grad_norm": 0.2923349440097809,
      "learning_rate": 0.0001656854292400542,
      "loss": 0.3575,
      "step": 1398
    },
    {
      "epoch": 0.5166174298375185,
      "grad_norm": 0.27221575379371643,
      "learning_rate": 0.0001656607956644907,
      "loss": 0.3295,
      "step": 1399
    },
    {
      "epoch": 0.51698670605613,
      "grad_norm": 0.32597747445106506,
      "learning_rate": 0.0001656361620889272,
      "loss": 0.2864,
      "step": 1400
    },
    {
      "epoch": 0.51698670605613,
      "eval_loss": 0.3158749043941498,
      "eval_runtime": 5.8634,
      "eval_samples_per_second": 8.527,
      "eval_steps_per_second": 1.194,
      "step": 1400
    },
    {
      "epoch": 0.5173559822747416,
      "grad_norm": 0.26604223251342773,
      "learning_rate": 0.0001656115285133637,
      "loss": 0.232,
      "step": 1401
    },
    {
      "epoch": 0.517725258493353,
      "grad_norm": 0.2920580506324768,
      "learning_rate": 0.00016558689493780023,
      "loss": 0.3138,
      "step": 1402
    },
    {
      "epoch": 0.5180945347119645,
      "grad_norm": 0.3129686117172241,
      "learning_rate": 0.00016556226136223674,
      "loss": 0.3843,
      "step": 1403
    },
    {
      "epoch": 0.518463810930576,
      "grad_norm": 0.2926866412162781,
      "learning_rate": 0.00016553762778667323,
      "loss": 0.3102,
      "step": 1404
    },
    {
      "epoch": 0.5188330871491876,
      "grad_norm": 0.3401108384132385,
      "learning_rate": 0.00016551299421110974,
      "loss": 0.3358,
      "step": 1405
    },
    {
      "epoch": 0.5192023633677991,
      "grad_norm": 0.3641965985298157,
      "learning_rate": 0.00016548836063554626,
      "loss": 0.3424,
      "step": 1406
    },
    {
      "epoch": 0.5195716395864106,
      "grad_norm": 0.24067524075508118,
      "learning_rate": 0.00016546372705998277,
      "loss": 0.2566,
      "step": 1407
    },
    {
      "epoch": 0.5199409158050221,
      "grad_norm": 0.28999650478363037,
      "learning_rate": 0.00016543909348441926,
      "loss": 0.3272,
      "step": 1408
    },
    {
      "epoch": 0.5203101920236337,
      "grad_norm": 0.25917020440101624,
      "learning_rate": 0.00016541445990885578,
      "loss": 0.2696,
      "step": 1409
    },
    {
      "epoch": 0.5206794682422452,
      "grad_norm": 0.3292158842086792,
      "learning_rate": 0.00016538982633329226,
      "loss": 0.3769,
      "step": 1410
    },
    {
      "epoch": 0.5210487444608567,
      "grad_norm": 0.24697770178318024,
      "learning_rate": 0.0001653651927577288,
      "loss": 0.2903,
      "step": 1411
    },
    {
      "epoch": 0.5214180206794683,
      "grad_norm": 0.35604941844940186,
      "learning_rate": 0.0001653405591821653,
      "loss": 0.3977,
      "step": 1412
    },
    {
      "epoch": 0.5217872968980798,
      "grad_norm": 0.2998948097229004,
      "learning_rate": 0.0001653159256066018,
      "loss": 0.3352,
      "step": 1413
    },
    {
      "epoch": 0.5221565731166913,
      "grad_norm": 0.3183724582195282,
      "learning_rate": 0.0001652912920310383,
      "loss": 0.3569,
      "step": 1414
    },
    {
      "epoch": 0.5225258493353028,
      "grad_norm": 0.36239326000213623,
      "learning_rate": 0.0001652666584554748,
      "loss": 0.412,
      "step": 1415
    },
    {
      "epoch": 0.5228951255539144,
      "grad_norm": 0.28282883763313293,
      "learning_rate": 0.00016524202487991132,
      "loss": 0.3554,
      "step": 1416
    },
    {
      "epoch": 0.5232644017725259,
      "grad_norm": 0.25308147072792053,
      "learning_rate": 0.00016521739130434784,
      "loss": 0.2832,
      "step": 1417
    },
    {
      "epoch": 0.5236336779911374,
      "grad_norm": 0.26561856269836426,
      "learning_rate": 0.00016519275772878433,
      "loss": 0.3491,
      "step": 1418
    },
    {
      "epoch": 0.5240029542097489,
      "grad_norm": 0.3264663517475128,
      "learning_rate": 0.00016516812415322084,
      "loss": 0.3381,
      "step": 1419
    },
    {
      "epoch": 0.5243722304283605,
      "grad_norm": 0.26392993330955505,
      "learning_rate": 0.00016514349057765736,
      "loss": 0.2983,
      "step": 1420
    },
    {
      "epoch": 0.524741506646972,
      "grad_norm": 0.22230780124664307,
      "learning_rate": 0.00016511885700209387,
      "loss": 0.2936,
      "step": 1421
    },
    {
      "epoch": 0.5251107828655834,
      "grad_norm": 0.2518426477909088,
      "learning_rate": 0.00016509422342653036,
      "loss": 0.3409,
      "step": 1422
    },
    {
      "epoch": 0.5254800590841949,
      "grad_norm": 0.28394436836242676,
      "learning_rate": 0.00016506958985096687,
      "loss": 0.2992,
      "step": 1423
    },
    {
      "epoch": 0.5258493353028065,
      "grad_norm": 0.2799946367740631,
      "learning_rate": 0.00016504495627540336,
      "loss": 0.339,
      "step": 1424
    },
    {
      "epoch": 0.526218611521418,
      "grad_norm": 0.24300873279571533,
      "learning_rate": 0.0001650203226998399,
      "loss": 0.2897,
      "step": 1425
    },
    {
      "epoch": 0.5265878877400295,
      "grad_norm": 0.2443646341562271,
      "learning_rate": 0.0001649956891242764,
      "loss": 0.2778,
      "step": 1426
    },
    {
      "epoch": 0.5269571639586411,
      "grad_norm": 0.24227364361286163,
      "learning_rate": 0.0001649710555487129,
      "loss": 0.3106,
      "step": 1427
    },
    {
      "epoch": 0.5273264401772526,
      "grad_norm": 0.24078361690044403,
      "learning_rate": 0.0001649464219731494,
      "loss": 0.3071,
      "step": 1428
    },
    {
      "epoch": 0.5276957163958641,
      "grad_norm": 0.26339876651763916,
      "learning_rate": 0.0001649217883975859,
      "loss": 0.3343,
      "step": 1429
    },
    {
      "epoch": 0.5280649926144756,
      "grad_norm": 0.28139811754226685,
      "learning_rate": 0.00016489715482202242,
      "loss": 0.307,
      "step": 1430
    },
    {
      "epoch": 0.5284342688330872,
      "grad_norm": 0.29128336906433105,
      "learning_rate": 0.00016487252124645894,
      "loss": 0.3207,
      "step": 1431
    },
    {
      "epoch": 0.5288035450516987,
      "grad_norm": 0.3636402189731598,
      "learning_rate": 0.00016484788767089542,
      "loss": 0.3457,
      "step": 1432
    },
    {
      "epoch": 0.5291728212703102,
      "grad_norm": 0.28574737906455994,
      "learning_rate": 0.00016482325409533194,
      "loss": 0.3648,
      "step": 1433
    },
    {
      "epoch": 0.5295420974889217,
      "grad_norm": 0.29462724924087524,
      "learning_rate": 0.00016479862051976845,
      "loss": 0.3001,
      "step": 1434
    },
    {
      "epoch": 0.5299113737075333,
      "grad_norm": 0.3110988438129425,
      "learning_rate": 0.00016477398694420497,
      "loss": 0.426,
      "step": 1435
    },
    {
      "epoch": 0.5302806499261448,
      "grad_norm": 0.2409798949956894,
      "learning_rate": 0.00016474935336864145,
      "loss": 0.2339,
      "step": 1436
    },
    {
      "epoch": 0.5306499261447563,
      "grad_norm": 0.387317031621933,
      "learning_rate": 0.00016472471979307797,
      "loss": 0.3548,
      "step": 1437
    },
    {
      "epoch": 0.5310192023633677,
      "grad_norm": 0.27128246426582336,
      "learning_rate": 0.00016470008621751448,
      "loss": 0.3148,
      "step": 1438
    },
    {
      "epoch": 0.5313884785819794,
      "grad_norm": 0.28824299573898315,
      "learning_rate": 0.000164675452641951,
      "loss": 0.3721,
      "step": 1439
    },
    {
      "epoch": 0.5317577548005908,
      "grad_norm": 0.27748748660087585,
      "learning_rate": 0.00016465081906638749,
      "loss": 0.3408,
      "step": 1440
    },
    {
      "epoch": 0.5321270310192023,
      "grad_norm": 0.2905904948711395,
      "learning_rate": 0.000164626185490824,
      "loss": 0.3243,
      "step": 1441
    },
    {
      "epoch": 0.5324963072378139,
      "grad_norm": 0.24771912395954132,
      "learning_rate": 0.0001646015519152605,
      "loss": 0.3371,
      "step": 1442
    },
    {
      "epoch": 0.5328655834564254,
      "grad_norm": 0.24672196805477142,
      "learning_rate": 0.00016457691833969703,
      "loss": 0.319,
      "step": 1443
    },
    {
      "epoch": 0.5332348596750369,
      "grad_norm": 0.3501986265182495,
      "learning_rate": 0.00016455228476413352,
      "loss": 0.3868,
      "step": 1444
    },
    {
      "epoch": 0.5336041358936484,
      "grad_norm": 0.2565975785255432,
      "learning_rate": 0.00016452765118857003,
      "loss": 0.3418,
      "step": 1445
    },
    {
      "epoch": 0.53397341211226,
      "grad_norm": 0.2441720813512802,
      "learning_rate": 0.00016450301761300652,
      "loss": 0.3622,
      "step": 1446
    },
    {
      "epoch": 0.5343426883308715,
      "grad_norm": 0.23341308534145355,
      "learning_rate": 0.00016447838403744303,
      "loss": 0.2762,
      "step": 1447
    },
    {
      "epoch": 0.534711964549483,
      "grad_norm": 0.2702963054180145,
      "learning_rate": 0.00016445375046187955,
      "loss": 0.3285,
      "step": 1448
    },
    {
      "epoch": 0.5350812407680945,
      "grad_norm": 0.3120301067829132,
      "learning_rate": 0.00016442911688631606,
      "loss": 0.3018,
      "step": 1449
    },
    {
      "epoch": 0.5354505169867061,
      "grad_norm": 0.2426011562347412,
      "learning_rate": 0.00016440448331075255,
      "loss": 0.3151,
      "step": 1450
    },
    {
      "epoch": 0.5354505169867061,
      "eval_loss": 0.3156881034374237,
      "eval_runtime": 5.8514,
      "eval_samples_per_second": 8.545,
      "eval_steps_per_second": 1.196,
      "step": 1450
    },
    {
      "epoch": 0.5358197932053176,
      "grad_norm": 0.24446044862270355,
      "learning_rate": 0.00016437984973518907,
      "loss": 0.3148,
      "step": 1451
    },
    {
      "epoch": 0.5361890694239291,
      "grad_norm": 0.3189534544944763,
      "learning_rate": 0.00016435521615962558,
      "loss": 0.3989,
      "step": 1452
    },
    {
      "epoch": 0.5365583456425406,
      "grad_norm": 0.24779005348682404,
      "learning_rate": 0.0001643305825840621,
      "loss": 0.279,
      "step": 1453
    },
    {
      "epoch": 0.5369276218611522,
      "grad_norm": 0.27577096223831177,
      "learning_rate": 0.00016430594900849858,
      "loss": 0.2801,
      "step": 1454
    },
    {
      "epoch": 0.5372968980797637,
      "grad_norm": 0.23628848791122437,
      "learning_rate": 0.0001642813154329351,
      "loss": 0.2881,
      "step": 1455
    },
    {
      "epoch": 0.5376661742983752,
      "grad_norm": 0.2869516909122467,
      "learning_rate": 0.00016425668185737158,
      "loss": 0.3083,
      "step": 1456
    },
    {
      "epoch": 0.5380354505169868,
      "grad_norm": 0.2787828743457794,
      "learning_rate": 0.00016423204828180813,
      "loss": 0.3451,
      "step": 1457
    },
    {
      "epoch": 0.5384047267355982,
      "grad_norm": 0.2797350585460663,
      "learning_rate": 0.00016420741470624461,
      "loss": 0.3591,
      "step": 1458
    },
    {
      "epoch": 0.5387740029542097,
      "grad_norm": 0.30753234028816223,
      "learning_rate": 0.00016418278113068113,
      "loss": 0.3568,
      "step": 1459
    },
    {
      "epoch": 0.5391432791728212,
      "grad_norm": 0.2536819875240326,
      "learning_rate": 0.00016415814755511762,
      "loss": 0.2977,
      "step": 1460
    },
    {
      "epoch": 0.5395125553914328,
      "grad_norm": 0.2714076638221741,
      "learning_rate": 0.00016413351397955413,
      "loss": 0.3208,
      "step": 1461
    },
    {
      "epoch": 0.5398818316100443,
      "grad_norm": 0.20879343152046204,
      "learning_rate": 0.00016410888040399065,
      "loss": 0.2451,
      "step": 1462
    },
    {
      "epoch": 0.5402511078286558,
      "grad_norm": 0.34940165281295776,
      "learning_rate": 0.00016408424682842716,
      "loss": 0.3927,
      "step": 1463
    },
    {
      "epoch": 0.5406203840472673,
      "grad_norm": 0.25528988242149353,
      "learning_rate": 0.00016405961325286365,
      "loss": 0.2849,
      "step": 1464
    },
    {
      "epoch": 0.5409896602658789,
      "grad_norm": 0.3349994719028473,
      "learning_rate": 0.00016403497967730016,
      "loss": 0.3715,
      "step": 1465
    },
    {
      "epoch": 0.5413589364844904,
      "grad_norm": 0.2674342393875122,
      "learning_rate": 0.00016401034610173668,
      "loss": 0.3148,
      "step": 1466
    },
    {
      "epoch": 0.5417282127031019,
      "grad_norm": 0.2494310438632965,
      "learning_rate": 0.0001639857125261732,
      "loss": 0.2822,
      "step": 1467
    },
    {
      "epoch": 0.5420974889217134,
      "grad_norm": 0.2767691910266876,
      "learning_rate": 0.00016396107895060968,
      "loss": 0.3036,
      "step": 1468
    },
    {
      "epoch": 0.542466765140325,
      "grad_norm": 0.24478130042552948,
      "learning_rate": 0.0001639364453750462,
      "loss": 0.2783,
      "step": 1469
    },
    {
      "epoch": 0.5428360413589365,
      "grad_norm": 0.25796937942504883,
      "learning_rate": 0.0001639118117994827,
      "loss": 0.3331,
      "step": 1470
    },
    {
      "epoch": 0.543205317577548,
      "grad_norm": 0.27148813009262085,
      "learning_rate": 0.00016388717822391922,
      "loss": 0.2906,
      "step": 1471
    },
    {
      "epoch": 0.5435745937961596,
      "grad_norm": 0.27060097455978394,
      "learning_rate": 0.0001638625446483557,
      "loss": 0.2871,
      "step": 1472
    },
    {
      "epoch": 0.5439438700147711,
      "grad_norm": 0.3541237413883209,
      "learning_rate": 0.00016383791107279222,
      "loss": 0.39,
      "step": 1473
    },
    {
      "epoch": 0.5443131462333826,
      "grad_norm": 0.2907795011997223,
      "learning_rate": 0.0001638132774972287,
      "loss": 0.3264,
      "step": 1474
    },
    {
      "epoch": 0.544682422451994,
      "grad_norm": 0.2731079161167145,
      "learning_rate": 0.00016378864392166525,
      "loss": 0.3401,
      "step": 1475
    },
    {
      "epoch": 0.5450516986706057,
      "grad_norm": 0.33572500944137573,
      "learning_rate": 0.00016376401034610174,
      "loss": 0.4631,
      "step": 1476
    },
    {
      "epoch": 0.5454209748892171,
      "grad_norm": 0.23366647958755493,
      "learning_rate": 0.00016373937677053826,
      "loss": 0.2643,
      "step": 1477
    },
    {
      "epoch": 0.5457902511078286,
      "grad_norm": 0.2857065200805664,
      "learning_rate": 0.00016371474319497474,
      "loss": 0.3489,
      "step": 1478
    },
    {
      "epoch": 0.5461595273264401,
      "grad_norm": 0.3102196455001831,
      "learning_rate": 0.00016369010961941126,
      "loss": 0.409,
      "step": 1479
    },
    {
      "epoch": 0.5465288035450517,
      "grad_norm": 0.3188258409500122,
      "learning_rate": 0.00016366547604384777,
      "loss": 0.3686,
      "step": 1480
    },
    {
      "epoch": 0.5468980797636632,
      "grad_norm": 0.3555956482887268,
      "learning_rate": 0.0001636408424682843,
      "loss": 0.3396,
      "step": 1481
    },
    {
      "epoch": 0.5472673559822747,
      "grad_norm": 0.2818538248538971,
      "learning_rate": 0.00016361620889272078,
      "loss": 0.3132,
      "step": 1482
    },
    {
      "epoch": 0.5476366322008862,
      "grad_norm": 0.23820848762989044,
      "learning_rate": 0.0001635915753171573,
      "loss": 0.3029,
      "step": 1483
    },
    {
      "epoch": 0.5480059084194978,
      "grad_norm": 0.3810647428035736,
      "learning_rate": 0.0001635669417415938,
      "loss": 0.3869,
      "step": 1484
    },
    {
      "epoch": 0.5483751846381093,
      "grad_norm": 0.27515873312950134,
      "learning_rate": 0.00016354230816603032,
      "loss": 0.3279,
      "step": 1485
    },
    {
      "epoch": 0.5487444608567208,
      "grad_norm": 0.23117667436599731,
      "learning_rate": 0.0001635176745904668,
      "loss": 0.337,
      "step": 1486
    },
    {
      "epoch": 0.5491137370753324,
      "grad_norm": 0.2633749842643738,
      "learning_rate": 0.00016349304101490332,
      "loss": 0.3625,
      "step": 1487
    },
    {
      "epoch": 0.5494830132939439,
      "grad_norm": 0.30863508582115173,
      "learning_rate": 0.0001634684074393398,
      "loss": 0.3967,
      "step": 1488
    },
    {
      "epoch": 0.5498522895125554,
      "grad_norm": 0.32042405009269714,
      "learning_rate": 0.00016344377386377635,
      "loss": 0.3298,
      "step": 1489
    },
    {
      "epoch": 0.5502215657311669,
      "grad_norm": 0.2894206941127777,
      "learning_rate": 0.00016341914028821284,
      "loss": 0.3117,
      "step": 1490
    },
    {
      "epoch": 0.5505908419497785,
      "grad_norm": 0.32331088185310364,
      "learning_rate": 0.00016339450671264935,
      "loss": 0.378,
      "step": 1491
    },
    {
      "epoch": 0.55096011816839,
      "grad_norm": 0.2450391948223114,
      "learning_rate": 0.00016336987313708584,
      "loss": 0.2865,
      "step": 1492
    },
    {
      "epoch": 0.5513293943870015,
      "grad_norm": 0.2539740800857544,
      "learning_rate": 0.00016334523956152236,
      "loss": 0.2886,
      "step": 1493
    },
    {
      "epoch": 0.551698670605613,
      "grad_norm": 0.29503604769706726,
      "learning_rate": 0.00016332060598595887,
      "loss": 0.335,
      "step": 1494
    },
    {
      "epoch": 0.5520679468242246,
      "grad_norm": 0.359852135181427,
      "learning_rate": 0.00016329597241039538,
      "loss": 0.3423,
      "step": 1495
    },
    {
      "epoch": 0.552437223042836,
      "grad_norm": 0.28774669766426086,
      "learning_rate": 0.00016327133883483187,
      "loss": 0.3651,
      "step": 1496
    },
    {
      "epoch": 0.5528064992614475,
      "grad_norm": 0.35923609137535095,
      "learning_rate": 0.0001632467052592684,
      "loss": 0.3273,
      "step": 1497
    },
    {
      "epoch": 0.553175775480059,
      "grad_norm": 0.23598815500736237,
      "learning_rate": 0.0001632220716837049,
      "loss": 0.2984,
      "step": 1498
    },
    {
      "epoch": 0.5535450516986706,
      "grad_norm": 0.3182399868965149,
      "learning_rate": 0.00016319743810814142,
      "loss": 0.3696,
      "step": 1499
    },
    {
      "epoch": 0.5539143279172821,
      "grad_norm": 0.23834967613220215,
      "learning_rate": 0.0001631728045325779,
      "loss": 0.2899,
      "step": 1500
    },
    {
      "epoch": 0.5539143279172821,
      "eval_loss": 0.3087127208709717,
      "eval_runtime": 5.8621,
      "eval_samples_per_second": 8.529,
      "eval_steps_per_second": 1.194,
      "step": 1500
    },
    {
      "epoch": 0.5542836041358936,
      "grad_norm": 0.2657660245895386,
      "learning_rate": 0.00016314817095701442,
      "loss": 0.3114,
      "step": 1501
    },
    {
      "epoch": 0.5546528803545052,
      "grad_norm": 0.29077285528182983,
      "learning_rate": 0.00016312353738145093,
      "loss": 0.3777,
      "step": 1502
    },
    {
      "epoch": 0.5550221565731167,
      "grad_norm": 0.2574448883533478,
      "learning_rate": 0.00016309890380588745,
      "loss": 0.3219,
      "step": 1503
    },
    {
      "epoch": 0.5553914327917282,
      "grad_norm": 0.26231813430786133,
      "learning_rate": 0.00016307427023032393,
      "loss": 0.3296,
      "step": 1504
    },
    {
      "epoch": 0.5557607090103397,
      "grad_norm": 0.2316693812608719,
      "learning_rate": 0.00016304963665476045,
      "loss": 0.2807,
      "step": 1505
    },
    {
      "epoch": 0.5561299852289513,
      "grad_norm": 0.3146055042743683,
      "learning_rate": 0.00016302500307919694,
      "loss": 0.3211,
      "step": 1506
    },
    {
      "epoch": 0.5564992614475628,
      "grad_norm": 0.2974916994571686,
      "learning_rate": 0.00016300036950363348,
      "loss": 0.3232,
      "step": 1507
    },
    {
      "epoch": 0.5568685376661743,
      "grad_norm": 0.25729867815971375,
      "learning_rate": 0.00016297573592806997,
      "loss": 0.3356,
      "step": 1508
    },
    {
      "epoch": 0.5572378138847858,
      "grad_norm": 0.23712551593780518,
      "learning_rate": 0.00016295110235250648,
      "loss": 0.2992,
      "step": 1509
    },
    {
      "epoch": 0.5576070901033974,
      "grad_norm": 0.3085513114929199,
      "learning_rate": 0.00016292646877694297,
      "loss": 0.4058,
      "step": 1510
    },
    {
      "epoch": 0.5579763663220089,
      "grad_norm": 0.3611229360103607,
      "learning_rate": 0.00016290183520137948,
      "loss": 0.4202,
      "step": 1511
    },
    {
      "epoch": 0.5583456425406204,
      "grad_norm": 0.2877354919910431,
      "learning_rate": 0.000162877201625816,
      "loss": 0.3227,
      "step": 1512
    },
    {
      "epoch": 0.558714918759232,
      "grad_norm": 0.32323339581489563,
      "learning_rate": 0.0001628525680502525,
      "loss": 0.3422,
      "step": 1513
    },
    {
      "epoch": 0.5590841949778435,
      "grad_norm": 0.32171431183815,
      "learning_rate": 0.000162827934474689,
      "loss": 0.3269,
      "step": 1514
    },
    {
      "epoch": 0.5594534711964549,
      "grad_norm": 0.28730499744415283,
      "learning_rate": 0.00016280330089912551,
      "loss": 0.3358,
      "step": 1515
    },
    {
      "epoch": 0.5598227474150664,
      "grad_norm": 0.3502312898635864,
      "learning_rate": 0.00016277866732356203,
      "loss": 0.3287,
      "step": 1516
    },
    {
      "epoch": 0.560192023633678,
      "grad_norm": 0.26959285140037537,
      "learning_rate": 0.00016275403374799854,
      "loss": 0.3193,
      "step": 1517
    },
    {
      "epoch": 0.5605612998522895,
      "grad_norm": 0.29513633251190186,
      "learning_rate": 0.00016272940017243503,
      "loss": 0.3663,
      "step": 1518
    },
    {
      "epoch": 0.560930576070901,
      "grad_norm": 0.3040134012699127,
      "learning_rate": 0.00016270476659687155,
      "loss": 0.3396,
      "step": 1519
    },
    {
      "epoch": 0.5612998522895125,
      "grad_norm": 0.3299552798271179,
      "learning_rate": 0.00016268013302130803,
      "loss": 0.3937,
      "step": 1520
    },
    {
      "epoch": 0.5616691285081241,
      "grad_norm": 0.32670196890830994,
      "learning_rate": 0.00016265549944574458,
      "loss": 0.293,
      "step": 1521
    },
    {
      "epoch": 0.5620384047267356,
      "grad_norm": 0.25746822357177734,
      "learning_rate": 0.00016263086587018106,
      "loss": 0.2973,
      "step": 1522
    },
    {
      "epoch": 0.5624076809453471,
      "grad_norm": 0.2539050281047821,
      "learning_rate": 0.00016260623229461758,
      "loss": 0.3251,
      "step": 1523
    },
    {
      "epoch": 0.5627769571639586,
      "grad_norm": 0.3070831894874573,
      "learning_rate": 0.00016258159871905406,
      "loss": 0.3441,
      "step": 1524
    },
    {
      "epoch": 0.5631462333825702,
      "grad_norm": 0.2641865015029907,
      "learning_rate": 0.00016255696514349058,
      "loss": 0.3218,
      "step": 1525
    },
    {
      "epoch": 0.5635155096011817,
      "grad_norm": 0.31374669075012207,
      "learning_rate": 0.0001625323315679271,
      "loss": 0.4017,
      "step": 1526
    },
    {
      "epoch": 0.5638847858197932,
      "grad_norm": 0.29651129245758057,
      "learning_rate": 0.0001625076979923636,
      "loss": 0.326,
      "step": 1527
    },
    {
      "epoch": 0.5642540620384048,
      "grad_norm": 0.30200543999671936,
      "learning_rate": 0.0001624830644168001,
      "loss": 0.332,
      "step": 1528
    },
    {
      "epoch": 0.5646233382570163,
      "grad_norm": 0.2974286675453186,
      "learning_rate": 0.0001624584308412366,
      "loss": 0.3546,
      "step": 1529
    },
    {
      "epoch": 0.5649926144756278,
      "grad_norm": 0.28115278482437134,
      "learning_rate": 0.00016243379726567313,
      "loss": 0.3282,
      "step": 1530
    },
    {
      "epoch": 0.5653618906942393,
      "grad_norm": 0.30444568395614624,
      "learning_rate": 0.00016240916369010964,
      "loss": 0.3091,
      "step": 1531
    },
    {
      "epoch": 0.5657311669128509,
      "grad_norm": 0.2837795317173004,
      "learning_rate": 0.00016238453011454613,
      "loss": 0.3632,
      "step": 1532
    },
    {
      "epoch": 0.5661004431314623,
      "grad_norm": 0.2722748816013336,
      "learning_rate": 0.00016235989653898264,
      "loss": 0.3298,
      "step": 1533
    },
    {
      "epoch": 0.5664697193500738,
      "grad_norm": 0.26394224166870117,
      "learning_rate": 0.00016233526296341913,
      "loss": 0.3369,
      "step": 1534
    },
    {
      "epoch": 0.5668389955686853,
      "grad_norm": 0.33864474296569824,
      "learning_rate": 0.00016231062938785567,
      "loss": 0.3344,
      "step": 1535
    },
    {
      "epoch": 0.5672082717872969,
      "grad_norm": 0.31261345744132996,
      "learning_rate": 0.00016228599581229216,
      "loss": 0.3695,
      "step": 1536
    },
    {
      "epoch": 0.5675775480059084,
      "grad_norm": 0.27573469281196594,
      "learning_rate": 0.00016226136223672867,
      "loss": 0.2538,
      "step": 1537
    },
    {
      "epoch": 0.5679468242245199,
      "grad_norm": 0.29221418499946594,
      "learning_rate": 0.00016223672866116516,
      "loss": 0.3761,
      "step": 1538
    },
    {
      "epoch": 0.5683161004431314,
      "grad_norm": 0.34038153290748596,
      "learning_rate": 0.0001622120950856017,
      "loss": 0.3817,
      "step": 1539
    },
    {
      "epoch": 0.568685376661743,
      "grad_norm": 0.3297428786754608,
      "learning_rate": 0.0001621874615100382,
      "loss": 0.3304,
      "step": 1540
    },
    {
      "epoch": 0.5690546528803545,
      "grad_norm": 0.3473551273345947,
      "learning_rate": 0.0001621628279344747,
      "loss": 0.3048,
      "step": 1541
    },
    {
      "epoch": 0.569423929098966,
      "grad_norm": 0.32104527950286865,
      "learning_rate": 0.0001621381943589112,
      "loss": 0.3712,
      "step": 1542
    },
    {
      "epoch": 0.5697932053175776,
      "grad_norm": 0.29048553109169006,
      "learning_rate": 0.0001621135607833477,
      "loss": 0.3003,
      "step": 1543
    },
    {
      "epoch": 0.5701624815361891,
      "grad_norm": 0.27641400694847107,
      "learning_rate": 0.00016208892720778422,
      "loss": 0.3013,
      "step": 1544
    },
    {
      "epoch": 0.5705317577548006,
      "grad_norm": 0.2748175263404846,
      "learning_rate": 0.00016206429363222074,
      "loss": 0.3312,
      "step": 1545
    },
    {
      "epoch": 0.5709010339734121,
      "grad_norm": 0.3276108503341675,
      "learning_rate": 0.00016203966005665722,
      "loss": 0.313,
      "step": 1546
    },
    {
      "epoch": 0.5712703101920237,
      "grad_norm": 0.2572093605995178,
      "learning_rate": 0.00016201502648109374,
      "loss": 0.3132,
      "step": 1547
    },
    {
      "epoch": 0.5716395864106352,
      "grad_norm": 0.29883354902267456,
      "learning_rate": 0.00016199039290553025,
      "loss": 0.2743,
      "step": 1548
    },
    {
      "epoch": 0.5720088626292467,
      "grad_norm": 0.24311719834804535,
      "learning_rate": 0.00016196575932996677,
      "loss": 0.2732,
      "step": 1549
    },
    {
      "epoch": 0.5723781388478582,
      "grad_norm": 0.25401267409324646,
      "learning_rate": 0.00016194112575440326,
      "loss": 0.2704,
      "step": 1550
    },
    {
      "epoch": 0.5723781388478582,
      "eval_loss": 0.3062502145767212,
      "eval_runtime": 5.8576,
      "eval_samples_per_second": 8.536,
      "eval_steps_per_second": 1.195,
      "step": 1550
    },
    {
      "epoch": 0.5727474150664698,
      "grad_norm": 0.339450865983963,
      "learning_rate": 0.00016191649217883977,
      "loss": 0.407,
      "step": 1551
    },
    {
      "epoch": 0.5731166912850812,
      "grad_norm": 0.3461816906929016,
      "learning_rate": 0.00016189185860327626,
      "loss": 0.3307,
      "step": 1552
    },
    {
      "epoch": 0.5734859675036927,
      "grad_norm": 0.3485092222690582,
      "learning_rate": 0.0001618672250277128,
      "loss": 0.3595,
      "step": 1553
    },
    {
      "epoch": 0.5738552437223042,
      "grad_norm": 0.29093149304389954,
      "learning_rate": 0.0001618425914521493,
      "loss": 0.3634,
      "step": 1554
    },
    {
      "epoch": 0.5742245199409158,
      "grad_norm": 0.2776423692703247,
      "learning_rate": 0.0001618179578765858,
      "loss": 0.2651,
      "step": 1555
    },
    {
      "epoch": 0.5745937961595273,
      "grad_norm": 0.2952934801578522,
      "learning_rate": 0.0001617933243010223,
      "loss": 0.3159,
      "step": 1556
    },
    {
      "epoch": 0.5749630723781388,
      "grad_norm": 0.27229222655296326,
      "learning_rate": 0.0001617686907254588,
      "loss": 0.2521,
      "step": 1557
    },
    {
      "epoch": 0.5753323485967504,
      "grad_norm": 0.2553476095199585,
      "learning_rate": 0.00016174405714989532,
      "loss": 0.2818,
      "step": 1558
    },
    {
      "epoch": 0.5757016248153619,
      "grad_norm": 0.32476967573165894,
      "learning_rate": 0.00016171942357433183,
      "loss": 0.4236,
      "step": 1559
    },
    {
      "epoch": 0.5760709010339734,
      "grad_norm": 0.2893090546131134,
      "learning_rate": 0.00016169478999876832,
      "loss": 0.3393,
      "step": 1560
    },
    {
      "epoch": 0.5764401772525849,
      "grad_norm": 0.4110506474971771,
      "learning_rate": 0.00016167015642320484,
      "loss": 0.3571,
      "step": 1561
    },
    {
      "epoch": 0.5768094534711965,
      "grad_norm": 0.33851632475852966,
      "learning_rate": 0.00016164552284764135,
      "loss": 0.3922,
      "step": 1562
    },
    {
      "epoch": 0.577178729689808,
      "grad_norm": 0.2789517343044281,
      "learning_rate": 0.00016162088927207786,
      "loss": 0.3559,
      "step": 1563
    },
    {
      "epoch": 0.5775480059084195,
      "grad_norm": 0.29222574830055237,
      "learning_rate": 0.00016159625569651435,
      "loss": 0.3155,
      "step": 1564
    },
    {
      "epoch": 0.577917282127031,
      "grad_norm": 0.2853766083717346,
      "learning_rate": 0.00016157162212095087,
      "loss": 0.3646,
      "step": 1565
    },
    {
      "epoch": 0.5782865583456426,
      "grad_norm": 0.317956805229187,
      "learning_rate": 0.00016154698854538735,
      "loss": 0.3804,
      "step": 1566
    },
    {
      "epoch": 0.5786558345642541,
      "grad_norm": 0.26587975025177,
      "learning_rate": 0.0001615223549698239,
      "loss": 0.357,
      "step": 1567
    },
    {
      "epoch": 0.5790251107828656,
      "grad_norm": 0.3031890392303467,
      "learning_rate": 0.00016149772139426038,
      "loss": 0.3592,
      "step": 1568
    },
    {
      "epoch": 0.579394387001477,
      "grad_norm": 0.2600473463535309,
      "learning_rate": 0.0001614730878186969,
      "loss": 0.2968,
      "step": 1569
    },
    {
      "epoch": 0.5797636632200887,
      "grad_norm": 0.2568517327308655,
      "learning_rate": 0.00016144845424313339,
      "loss": 0.3467,
      "step": 1570
    },
    {
      "epoch": 0.5801329394387001,
      "grad_norm": 0.2554601728916168,
      "learning_rate": 0.0001614238206675699,
      "loss": 0.3469,
      "step": 1571
    },
    {
      "epoch": 0.5805022156573116,
      "grad_norm": 0.31351980566978455,
      "learning_rate": 0.00016139918709200642,
      "loss": 0.3786,
      "step": 1572
    },
    {
      "epoch": 0.5808714918759232,
      "grad_norm": 0.2992032468318939,
      "learning_rate": 0.00016137455351644293,
      "loss": 0.3611,
      "step": 1573
    },
    {
      "epoch": 0.5812407680945347,
      "grad_norm": 0.3725816011428833,
      "learning_rate": 0.00016134991994087942,
      "loss": 0.3144,
      "step": 1574
    },
    {
      "epoch": 0.5816100443131462,
      "grad_norm": 0.31933802366256714,
      "learning_rate": 0.00016132528636531593,
      "loss": 0.4571,
      "step": 1575
    },
    {
      "epoch": 0.5819793205317577,
      "grad_norm": 0.33846473693847656,
      "learning_rate": 0.00016130065278975245,
      "loss": 0.3462,
      "step": 1576
    },
    {
      "epoch": 0.5823485967503693,
      "grad_norm": 0.28070947527885437,
      "learning_rate": 0.00016127601921418896,
      "loss": 0.3154,
      "step": 1577
    },
    {
      "epoch": 0.5827178729689808,
      "grad_norm": 0.2888851463794708,
      "learning_rate": 0.00016125138563862545,
      "loss": 0.4309,
      "step": 1578
    },
    {
      "epoch": 0.5830871491875923,
      "grad_norm": 0.30861085653305054,
      "learning_rate": 0.00016122675206306196,
      "loss": 0.3413,
      "step": 1579
    },
    {
      "epoch": 0.5834564254062038,
      "grad_norm": 0.27672407031059265,
      "learning_rate": 0.00016120211848749848,
      "loss": 0.3191,
      "step": 1580
    },
    {
      "epoch": 0.5838257016248154,
      "grad_norm": 0.2880988121032715,
      "learning_rate": 0.000161177484911935,
      "loss": 0.3164,
      "step": 1581
    },
    {
      "epoch": 0.5841949778434269,
      "grad_norm": 0.263375461101532,
      "learning_rate": 0.00016115285133637148,
      "loss": 0.3351,
      "step": 1582
    },
    {
      "epoch": 0.5845642540620384,
      "grad_norm": 0.2584865987300873,
      "learning_rate": 0.000161128217760808,
      "loss": 0.291,
      "step": 1583
    },
    {
      "epoch": 0.5849335302806499,
      "grad_norm": 0.30513739585876465,
      "learning_rate": 0.00016110358418524448,
      "loss": 0.3543,
      "step": 1584
    },
    {
      "epoch": 0.5853028064992615,
      "grad_norm": 0.2361738681793213,
      "learning_rate": 0.00016107895060968102,
      "loss": 0.2925,
      "step": 1585
    },
    {
      "epoch": 0.585672082717873,
      "grad_norm": 0.2888126075267792,
      "learning_rate": 0.0001610543170341175,
      "loss": 0.3024,
      "step": 1586
    },
    {
      "epoch": 0.5860413589364845,
      "grad_norm": 0.2634701430797577,
      "learning_rate": 0.00016102968345855403,
      "loss": 0.3267,
      "step": 1587
    },
    {
      "epoch": 0.5864106351550961,
      "grad_norm": 0.3056796193122864,
      "learning_rate": 0.00016100504988299051,
      "loss": 0.3574,
      "step": 1588
    },
    {
      "epoch": 0.5867799113737076,
      "grad_norm": 0.29132723808288574,
      "learning_rate": 0.00016098041630742703,
      "loss": 0.2785,
      "step": 1589
    },
    {
      "epoch": 0.587149187592319,
      "grad_norm": 0.24154838919639587,
      "learning_rate": 0.00016095578273186354,
      "loss": 0.3014,
      "step": 1590
    },
    {
      "epoch": 0.5875184638109305,
      "grad_norm": 0.295614093542099,
      "learning_rate": 0.00016093114915630006,
      "loss": 0.3135,
      "step": 1591
    },
    {
      "epoch": 0.5878877400295421,
      "grad_norm": 0.2389475256204605,
      "learning_rate": 0.00016090651558073655,
      "loss": 0.2642,
      "step": 1592
    },
    {
      "epoch": 0.5882570162481536,
      "grad_norm": 0.26387137174606323,
      "learning_rate": 0.00016088188200517306,
      "loss": 0.3154,
      "step": 1593
    },
    {
      "epoch": 0.5886262924667651,
      "grad_norm": 0.3425314128398895,
      "learning_rate": 0.00016085724842960957,
      "loss": 0.3827,
      "step": 1594
    },
    {
      "epoch": 0.5889955686853766,
      "grad_norm": 0.37990602850914,
      "learning_rate": 0.0001608326148540461,
      "loss": 0.4037,
      "step": 1595
    },
    {
      "epoch": 0.5893648449039882,
      "grad_norm": 0.30081379413604736,
      "learning_rate": 0.00016080798127848258,
      "loss": 0.3083,
      "step": 1596
    },
    {
      "epoch": 0.5897341211225997,
      "grad_norm": 0.2976696193218231,
      "learning_rate": 0.0001607833477029191,
      "loss": 0.3093,
      "step": 1597
    },
    {
      "epoch": 0.5901033973412112,
      "grad_norm": 0.25933101773262024,
      "learning_rate": 0.00016075871412735558,
      "loss": 0.2725,
      "step": 1598
    },
    {
      "epoch": 0.5904726735598228,
      "grad_norm": 0.27247390151023865,
      "learning_rate": 0.00016073408055179212,
      "loss": 0.2799,
      "step": 1599
    },
    {
      "epoch": 0.5908419497784343,
      "grad_norm": 0.30138272047042847,
      "learning_rate": 0.0001607094469762286,
      "loss": 0.3352,
      "step": 1600
    },
    {
      "epoch": 0.5908419497784343,
      "eval_loss": 0.30299845337867737,
      "eval_runtime": 5.8515,
      "eval_samples_per_second": 8.545,
      "eval_steps_per_second": 1.196,
      "step": 1600
    },
    {
      "epoch": 0.5912112259970458,
      "grad_norm": 0.2863950729370117,
      "learning_rate": 0.00016068481340066512,
      "loss": 0.3244,
      "step": 1601
    },
    {
      "epoch": 0.5915805022156573,
      "grad_norm": 0.28526681661605835,
      "learning_rate": 0.0001606601798251016,
      "loss": 0.3739,
      "step": 1602
    },
    {
      "epoch": 0.5919497784342689,
      "grad_norm": 0.32612308859825134,
      "learning_rate": 0.00016063554624953813,
      "loss": 0.3797,
      "step": 1603
    },
    {
      "epoch": 0.5923190546528804,
      "grad_norm": 0.2852473556995392,
      "learning_rate": 0.00016061091267397464,
      "loss": 0.3203,
      "step": 1604
    },
    {
      "epoch": 0.5926883308714919,
      "grad_norm": 0.23356491327285767,
      "learning_rate": 0.00016058627909841115,
      "loss": 0.2677,
      "step": 1605
    },
    {
      "epoch": 0.5930576070901034,
      "grad_norm": 0.27636459469795227,
      "learning_rate": 0.00016056164552284764,
      "loss": 0.2944,
      "step": 1606
    },
    {
      "epoch": 0.593426883308715,
      "grad_norm": 0.23538675904273987,
      "learning_rate": 0.00016053701194728416,
      "loss": 0.2815,
      "step": 1607
    },
    {
      "epoch": 0.5937961595273265,
      "grad_norm": 0.32497119903564453,
      "learning_rate": 0.00016051237837172067,
      "loss": 0.4065,
      "step": 1608
    },
    {
      "epoch": 0.5941654357459379,
      "grad_norm": 0.3353843092918396,
      "learning_rate": 0.00016048774479615719,
      "loss": 0.4053,
      "step": 1609
    },
    {
      "epoch": 0.5945347119645494,
      "grad_norm": 0.24563480913639069,
      "learning_rate": 0.00016046311122059367,
      "loss": 0.2794,
      "step": 1610
    },
    {
      "epoch": 0.594903988183161,
      "grad_norm": 0.31058967113494873,
      "learning_rate": 0.0001604384776450302,
      "loss": 0.3113,
      "step": 1611
    },
    {
      "epoch": 0.5952732644017725,
      "grad_norm": 0.2887929081916809,
      "learning_rate": 0.0001604138440694667,
      "loss": 0.3434,
      "step": 1612
    },
    {
      "epoch": 0.595642540620384,
      "grad_norm": 0.2597660422325134,
      "learning_rate": 0.00016038921049390322,
      "loss": 0.231,
      "step": 1613
    },
    {
      "epoch": 0.5960118168389956,
      "grad_norm": 0.3016369938850403,
      "learning_rate": 0.0001603645769183397,
      "loss": 0.3576,
      "step": 1614
    },
    {
      "epoch": 0.5963810930576071,
      "grad_norm": 0.37979385256767273,
      "learning_rate": 0.00016033994334277622,
      "loss": 0.3593,
      "step": 1615
    },
    {
      "epoch": 0.5967503692762186,
      "grad_norm": 0.2681538760662079,
      "learning_rate": 0.0001603153097672127,
      "loss": 0.2734,
      "step": 1616
    },
    {
      "epoch": 0.5971196454948301,
      "grad_norm": 0.2787245810031891,
      "learning_rate": 0.00016029067619164925,
      "loss": 0.3407,
      "step": 1617
    },
    {
      "epoch": 0.5974889217134417,
      "grad_norm": 0.39077043533325195,
      "learning_rate": 0.00016026604261608574,
      "loss": 0.4054,
      "step": 1618
    },
    {
      "epoch": 0.5978581979320532,
      "grad_norm": 0.29116880893707275,
      "learning_rate": 0.00016024140904052225,
      "loss": 0.3491,
      "step": 1619
    },
    {
      "epoch": 0.5982274741506647,
      "grad_norm": 0.32160866260528564,
      "learning_rate": 0.00016021677546495874,
      "loss": 0.3142,
      "step": 1620
    },
    {
      "epoch": 0.5985967503692762,
      "grad_norm": 0.29578158259391785,
      "learning_rate": 0.00016019214188939525,
      "loss": 0.3588,
      "step": 1621
    },
    {
      "epoch": 0.5989660265878878,
      "grad_norm": 0.2547626197338104,
      "learning_rate": 0.00016016750831383177,
      "loss": 0.3059,
      "step": 1622
    },
    {
      "epoch": 0.5993353028064993,
      "grad_norm": 0.3139692544937134,
      "learning_rate": 0.00016014287473826828,
      "loss": 0.3273,
      "step": 1623
    },
    {
      "epoch": 0.5997045790251108,
      "grad_norm": 0.2903187870979309,
      "learning_rate": 0.00016011824116270477,
      "loss": 0.3736,
      "step": 1624
    },
    {
      "epoch": 0.6000738552437223,
      "grad_norm": 0.2500844895839691,
      "learning_rate": 0.00016009360758714128,
      "loss": 0.3023,
      "step": 1625
    },
    {
      "epoch": 0.6004431314623339,
      "grad_norm": 0.29968592524528503,
      "learning_rate": 0.0001600689740115778,
      "loss": 0.324,
      "step": 1626
    },
    {
      "epoch": 0.6008124076809453,
      "grad_norm": 0.2452509105205536,
      "learning_rate": 0.00016004434043601431,
      "loss": 0.3263,
      "step": 1627
    },
    {
      "epoch": 0.6011816838995568,
      "grad_norm": 0.26588737964630127,
      "learning_rate": 0.0001600197068604508,
      "loss": 0.2953,
      "step": 1628
    },
    {
      "epoch": 0.6015509601181684,
      "grad_norm": 0.2977176010608673,
      "learning_rate": 0.0001599950732848873,
      "loss": 0.3184,
      "step": 1629
    },
    {
      "epoch": 0.6019202363367799,
      "grad_norm": 0.30278778076171875,
      "learning_rate": 0.0001599704397093238,
      "loss": 0.4025,
      "step": 1630
    },
    {
      "epoch": 0.6022895125553914,
      "grad_norm": 0.2819361090660095,
      "learning_rate": 0.00015994580613376032,
      "loss": 0.3347,
      "step": 1631
    },
    {
      "epoch": 0.6026587887740029,
      "grad_norm": 0.24064016342163086,
      "learning_rate": 0.00015992117255819683,
      "loss": 0.3209,
      "step": 1632
    },
    {
      "epoch": 0.6030280649926145,
      "grad_norm": 0.31922975182533264,
      "learning_rate": 0.00015989653898263332,
      "loss": 0.3373,
      "step": 1633
    },
    {
      "epoch": 0.603397341211226,
      "grad_norm": 0.35239550471305847,
      "learning_rate": 0.00015987190540706984,
      "loss": 0.3235,
      "step": 1634
    },
    {
      "epoch": 0.6037666174298375,
      "grad_norm": 0.35154789686203003,
      "learning_rate": 0.00015984727183150635,
      "loss": 0.371,
      "step": 1635
    },
    {
      "epoch": 0.604135893648449,
      "grad_norm": 0.2805372178554535,
      "learning_rate": 0.00015982263825594286,
      "loss": 0.316,
      "step": 1636
    },
    {
      "epoch": 0.6045051698670606,
      "grad_norm": 0.2914516031742096,
      "learning_rate": 0.00015979800468037935,
      "loss": 0.3622,
      "step": 1637
    },
    {
      "epoch": 0.6048744460856721,
      "grad_norm": 0.297642320394516,
      "learning_rate": 0.00015977337110481587,
      "loss": 0.3404,
      "step": 1638
    },
    {
      "epoch": 0.6052437223042836,
      "grad_norm": 0.32754257321357727,
      "learning_rate": 0.00015974873752925238,
      "loss": 0.3707,
      "step": 1639
    },
    {
      "epoch": 0.6056129985228951,
      "grad_norm": 0.2718541920185089,
      "learning_rate": 0.0001597241039536889,
      "loss": 0.3246,
      "step": 1640
    },
    {
      "epoch": 0.6059822747415067,
      "grad_norm": 0.28009459376335144,
      "learning_rate": 0.00015969947037812538,
      "loss": 0.2848,
      "step": 1641
    },
    {
      "epoch": 0.6063515509601182,
      "grad_norm": 0.27391892671585083,
      "learning_rate": 0.0001596748368025619,
      "loss": 0.2587,
      "step": 1642
    },
    {
      "epoch": 0.6067208271787297,
      "grad_norm": 0.25154897570610046,
      "learning_rate": 0.00015965020322699839,
      "loss": 0.2921,
      "step": 1643
    },
    {
      "epoch": 0.6070901033973413,
      "grad_norm": 0.30357787013053894,
      "learning_rate": 0.00015962556965143493,
      "loss": 0.3288,
      "step": 1644
    },
    {
      "epoch": 0.6074593796159528,
      "grad_norm": 0.3554220497608185,
      "learning_rate": 0.00015960093607587141,
      "loss": 0.4087,
      "step": 1645
    },
    {
      "epoch": 0.6078286558345642,
      "grad_norm": 0.29400530457496643,
      "learning_rate": 0.00015957630250030793,
      "loss": 0.3002,
      "step": 1646
    },
    {
      "epoch": 0.6081979320531757,
      "grad_norm": 0.25072628259658813,
      "learning_rate": 0.00015955166892474442,
      "loss": 0.2878,
      "step": 1647
    },
    {
      "epoch": 0.6085672082717873,
      "grad_norm": 0.24574296176433563,
      "learning_rate": 0.00015952703534918093,
      "loss": 0.306,
      "step": 1648
    },
    {
      "epoch": 0.6089364844903988,
      "grad_norm": 0.2616758346557617,
      "learning_rate": 0.00015950240177361745,
      "loss": 0.3238,
      "step": 1649
    },
    {
      "epoch": 0.6093057607090103,
      "grad_norm": 0.5619114637374878,
      "learning_rate": 0.00015947776819805396,
      "loss": 0.332,
      "step": 1650
    },
    {
      "epoch": 0.6093057607090103,
      "eval_loss": 0.3030269742012024,
      "eval_runtime": 5.8506,
      "eval_samples_per_second": 8.546,
      "eval_steps_per_second": 1.196,
      "step": 1650
    },
    {
      "epoch": 0.6096750369276218,
      "grad_norm": 0.2748061716556549,
      "learning_rate": 0.00015945313462249045,
      "loss": 0.326,
      "step": 1651
    },
    {
      "epoch": 0.6100443131462334,
      "grad_norm": 0.26775607466697693,
      "learning_rate": 0.00015942850104692696,
      "loss": 0.3089,
      "step": 1652
    },
    {
      "epoch": 0.6104135893648449,
      "grad_norm": 0.29094046354293823,
      "learning_rate": 0.00015940386747136348,
      "loss": 0.3442,
      "step": 1653
    },
    {
      "epoch": 0.6107828655834564,
      "grad_norm": 0.3499103784561157,
      "learning_rate": 0.0001593792338958,
      "loss": 0.3428,
      "step": 1654
    },
    {
      "epoch": 0.6111521418020679,
      "grad_norm": 0.29585933685302734,
      "learning_rate": 0.00015935460032023648,
      "loss": 0.3881,
      "step": 1655
    },
    {
      "epoch": 0.6115214180206795,
      "grad_norm": 0.28750079870224,
      "learning_rate": 0.000159329966744673,
      "loss": 0.3441,
      "step": 1656
    },
    {
      "epoch": 0.611890694239291,
      "grad_norm": 0.3632444441318512,
      "learning_rate": 0.00015930533316910948,
      "loss": 0.4613,
      "step": 1657
    },
    {
      "epoch": 0.6122599704579025,
      "grad_norm": 0.3101236820220947,
      "learning_rate": 0.00015928069959354602,
      "loss": 0.3227,
      "step": 1658
    },
    {
      "epoch": 0.6126292466765141,
      "grad_norm": 0.30292463302612305,
      "learning_rate": 0.0001592560660179825,
      "loss": 0.3794,
      "step": 1659
    },
    {
      "epoch": 0.6129985228951256,
      "grad_norm": 0.2677481472492218,
      "learning_rate": 0.00015923143244241903,
      "loss": 0.3185,
      "step": 1660
    },
    {
      "epoch": 0.6133677991137371,
      "grad_norm": 0.25809037685394287,
      "learning_rate": 0.0001592067988668555,
      "loss": 0.3089,
      "step": 1661
    },
    {
      "epoch": 0.6137370753323486,
      "grad_norm": 0.23848919570446014,
      "learning_rate": 0.00015918216529129203,
      "loss": 0.2741,
      "step": 1662
    },
    {
      "epoch": 0.6141063515509602,
      "grad_norm": 0.30430811643600464,
      "learning_rate": 0.00015915753171572854,
      "loss": 0.3688,
      "step": 1663
    },
    {
      "epoch": 0.6144756277695717,
      "grad_norm": 0.246963769197464,
      "learning_rate": 0.00015913289814016506,
      "loss": 0.2821,
      "step": 1664
    },
    {
      "epoch": 0.6148449039881831,
      "grad_norm": 0.24022459983825684,
      "learning_rate": 0.00015910826456460154,
      "loss": 0.3068,
      "step": 1665
    },
    {
      "epoch": 0.6152141802067946,
      "grad_norm": 0.2601327896118164,
      "learning_rate": 0.00015908363098903806,
      "loss": 0.2922,
      "step": 1666
    },
    {
      "epoch": 0.6155834564254062,
      "grad_norm": 0.27064478397369385,
      "learning_rate": 0.00015905899741347457,
      "loss": 0.3405,
      "step": 1667
    },
    {
      "epoch": 0.6159527326440177,
      "grad_norm": 0.3012557625770569,
      "learning_rate": 0.0001590343638379111,
      "loss": 0.3401,
      "step": 1668
    },
    {
      "epoch": 0.6163220088626292,
      "grad_norm": 0.237897127866745,
      "learning_rate": 0.00015900973026234758,
      "loss": 0.2843,
      "step": 1669
    },
    {
      "epoch": 0.6166912850812407,
      "grad_norm": 0.24375304579734802,
      "learning_rate": 0.0001589850966867841,
      "loss": 0.3108,
      "step": 1670
    },
    {
      "epoch": 0.6170605612998523,
      "grad_norm": 0.291055291891098,
      "learning_rate": 0.0001589604631112206,
      "loss": 0.3754,
      "step": 1671
    },
    {
      "epoch": 0.6174298375184638,
      "grad_norm": 0.23340968787670135,
      "learning_rate": 0.00015893582953565712,
      "loss": 0.3151,
      "step": 1672
    },
    {
      "epoch": 0.6177991137370753,
      "grad_norm": 0.25678062438964844,
      "learning_rate": 0.0001589111959600936,
      "loss": 0.3065,
      "step": 1673
    },
    {
      "epoch": 0.6181683899556869,
      "grad_norm": 0.27369391918182373,
      "learning_rate": 0.00015888656238453012,
      "loss": 0.2954,
      "step": 1674
    },
    {
      "epoch": 0.6185376661742984,
      "grad_norm": 0.2728053629398346,
      "learning_rate": 0.0001588619288089666,
      "loss": 0.331,
      "step": 1675
    },
    {
      "epoch": 0.6189069423929099,
      "grad_norm": 0.26507559418678284,
      "learning_rate": 0.00015883729523340315,
      "loss": 0.2763,
      "step": 1676
    },
    {
      "epoch": 0.6192762186115214,
      "grad_norm": 0.3048473000526428,
      "learning_rate": 0.00015881266165783964,
      "loss": 0.3407,
      "step": 1677
    },
    {
      "epoch": 0.619645494830133,
      "grad_norm": 0.2589951157569885,
      "learning_rate": 0.00015878802808227615,
      "loss": 0.3002,
      "step": 1678
    },
    {
      "epoch": 0.6200147710487445,
      "grad_norm": 0.2744160592556,
      "learning_rate": 0.00015876339450671264,
      "loss": 0.335,
      "step": 1679
    },
    {
      "epoch": 0.620384047267356,
      "grad_norm": 0.26012927293777466,
      "learning_rate": 0.00015873876093114916,
      "loss": 0.3154,
      "step": 1680
    },
    {
      "epoch": 0.6207533234859675,
      "grad_norm": 0.2780097424983978,
      "learning_rate": 0.00015871412735558567,
      "loss": 0.3,
      "step": 1681
    },
    {
      "epoch": 0.6211225997045791,
      "grad_norm": 0.20938009023666382,
      "learning_rate": 0.00015868949378002219,
      "loss": 0.2146,
      "step": 1682
    },
    {
      "epoch": 0.6214918759231906,
      "grad_norm": 0.2516339421272278,
      "learning_rate": 0.00015866486020445867,
      "loss": 0.2809,
      "step": 1683
    },
    {
      "epoch": 0.621861152141802,
      "grad_norm": 0.2812969386577606,
      "learning_rate": 0.0001586402266288952,
      "loss": 0.3003,
      "step": 1684
    },
    {
      "epoch": 0.6222304283604135,
      "grad_norm": 0.20179781317710876,
      "learning_rate": 0.0001586155930533317,
      "loss": 0.2153,
      "step": 1685
    },
    {
      "epoch": 0.6225997045790251,
      "grad_norm": 0.33972302079200745,
      "learning_rate": 0.00015859095947776822,
      "loss": 0.3614,
      "step": 1686
    },
    {
      "epoch": 0.6229689807976366,
      "grad_norm": 0.2498815804719925,
      "learning_rate": 0.0001585663259022047,
      "loss": 0.332,
      "step": 1687
    },
    {
      "epoch": 0.6233382570162481,
      "grad_norm": 0.30376482009887695,
      "learning_rate": 0.00015854169232664122,
      "loss": 0.3114,
      "step": 1688
    },
    {
      "epoch": 0.6237075332348597,
      "grad_norm": 0.2777147591114044,
      "learning_rate": 0.0001585170587510777,
      "loss": 0.3257,
      "step": 1689
    },
    {
      "epoch": 0.6240768094534712,
      "grad_norm": 0.28457579016685486,
      "learning_rate": 0.00015849242517551425,
      "loss": 0.3483,
      "step": 1690
    },
    {
      "epoch": 0.6244460856720827,
      "grad_norm": 0.3040483593940735,
      "learning_rate": 0.00015846779159995074,
      "loss": 0.3239,
      "step": 1691
    },
    {
      "epoch": 0.6248153618906942,
      "grad_norm": 0.2769504189491272,
      "learning_rate": 0.00015844315802438725,
      "loss": 0.3388,
      "step": 1692
    },
    {
      "epoch": 0.6251846381093058,
      "grad_norm": 0.2751236855983734,
      "learning_rate": 0.00015841852444882374,
      "loss": 0.3227,
      "step": 1693
    },
    {
      "epoch": 0.6255539143279173,
      "grad_norm": 0.2764040231704712,
      "learning_rate": 0.00015839389087326025,
      "loss": 0.3437,
      "step": 1694
    },
    {
      "epoch": 0.6259231905465288,
      "grad_norm": 0.25166937708854675,
      "learning_rate": 0.00015836925729769677,
      "loss": 0.2892,
      "step": 1695
    },
    {
      "epoch": 0.6262924667651403,
      "grad_norm": 0.307192325592041,
      "learning_rate": 0.00015834462372213328,
      "loss": 0.3501,
      "step": 1696
    },
    {
      "epoch": 0.6266617429837519,
      "grad_norm": 0.33956125378608704,
      "learning_rate": 0.00015831999014656977,
      "loss": 0.4298,
      "step": 1697
    },
    {
      "epoch": 0.6270310192023634,
      "grad_norm": 0.21702831983566284,
      "learning_rate": 0.00015829535657100628,
      "loss": 0.2942,
      "step": 1698
    },
    {
      "epoch": 0.6274002954209749,
      "grad_norm": 0.27251285314559937,
      "learning_rate": 0.0001582707229954428,
      "loss": 0.3085,
      "step": 1699
    },
    {
      "epoch": 0.6277695716395865,
      "grad_norm": 0.2970748245716095,
      "learning_rate": 0.0001582460894198793,
      "loss": 0.3112,
      "step": 1700
    },
    {
      "epoch": 0.6277695716395865,
      "eval_loss": 0.2985492944717407,
      "eval_runtime": 5.8512,
      "eval_samples_per_second": 8.545,
      "eval_steps_per_second": 1.196,
      "step": 1700
    },
    {
      "epoch": 0.628138847858198,
      "grad_norm": 0.32603204250335693,
      "learning_rate": 0.0001582214558443158,
      "loss": 0.3757,
      "step": 1701
    },
    {
      "epoch": 0.6285081240768094,
      "grad_norm": 0.3002977967262268,
      "learning_rate": 0.00015819682226875232,
      "loss": 0.3623,
      "step": 1702
    },
    {
      "epoch": 0.6288774002954209,
      "grad_norm": 0.2899661958217621,
      "learning_rate": 0.0001581721886931888,
      "loss": 0.321,
      "step": 1703
    },
    {
      "epoch": 0.6292466765140325,
      "grad_norm": 0.29438790678977966,
      "learning_rate": 0.00015814755511762534,
      "loss": 0.3842,
      "step": 1704
    },
    {
      "epoch": 0.629615952732644,
      "grad_norm": 0.28124135732650757,
      "learning_rate": 0.00015812292154206183,
      "loss": 0.3379,
      "step": 1705
    },
    {
      "epoch": 0.6299852289512555,
      "grad_norm": 0.29183897376060486,
      "learning_rate": 0.00015809828796649835,
      "loss": 0.3267,
      "step": 1706
    },
    {
      "epoch": 0.630354505169867,
      "grad_norm": 0.30754607915878296,
      "learning_rate": 0.00015807365439093483,
      "loss": 0.382,
      "step": 1707
    },
    {
      "epoch": 0.6307237813884786,
      "grad_norm": 0.2588324546813965,
      "learning_rate": 0.00015804902081537135,
      "loss": 0.3673,
      "step": 1708
    },
    {
      "epoch": 0.6310930576070901,
      "grad_norm": 0.3831642270088196,
      "learning_rate": 0.00015802438723980786,
      "loss": 0.3531,
      "step": 1709
    },
    {
      "epoch": 0.6314623338257016,
      "grad_norm": 0.24796739220619202,
      "learning_rate": 0.00015799975366424438,
      "loss": 0.2398,
      "step": 1710
    },
    {
      "epoch": 0.6318316100443131,
      "grad_norm": 0.27409496903419495,
      "learning_rate": 0.00015797512008868087,
      "loss": 0.3508,
      "step": 1711
    },
    {
      "epoch": 0.6322008862629247,
      "grad_norm": 0.3816826641559601,
      "learning_rate": 0.00015795048651311738,
      "loss": 0.3976,
      "step": 1712
    },
    {
      "epoch": 0.6325701624815362,
      "grad_norm": 0.26184147596359253,
      "learning_rate": 0.0001579258529375539,
      "loss": 0.3316,
      "step": 1713
    },
    {
      "epoch": 0.6329394387001477,
      "grad_norm": 0.24922801554203033,
      "learning_rate": 0.0001579012193619904,
      "loss": 0.3003,
      "step": 1714
    },
    {
      "epoch": 0.6333087149187593,
      "grad_norm": 0.3036719560623169,
      "learning_rate": 0.0001578765857864269,
      "loss": 0.3311,
      "step": 1715
    },
    {
      "epoch": 0.6336779911373708,
      "grad_norm": 0.2686334550380707,
      "learning_rate": 0.0001578519522108634,
      "loss": 0.3117,
      "step": 1716
    },
    {
      "epoch": 0.6340472673559823,
      "grad_norm": 0.3197580575942993,
      "learning_rate": 0.00015782731863529993,
      "loss": 0.3464,
      "step": 1717
    },
    {
      "epoch": 0.6344165435745938,
      "grad_norm": 0.2406512051820755,
      "learning_rate": 0.00015780268505973644,
      "loss": 0.2651,
      "step": 1718
    },
    {
      "epoch": 0.6347858197932054,
      "grad_norm": 0.29690489172935486,
      "learning_rate": 0.00015777805148417293,
      "loss": 0.4247,
      "step": 1719
    },
    {
      "epoch": 0.6351550960118169,
      "grad_norm": 0.28856369853019714,
      "learning_rate": 0.00015775341790860944,
      "loss": 0.3043,
      "step": 1720
    },
    {
      "epoch": 0.6355243722304283,
      "grad_norm": 0.27125653624534607,
      "learning_rate": 0.00015772878433304593,
      "loss": 0.2969,
      "step": 1721
    },
    {
      "epoch": 0.6358936484490398,
      "grad_norm": 0.23672378063201904,
      "learning_rate": 0.00015770415075748247,
      "loss": 0.2979,
      "step": 1722
    },
    {
      "epoch": 0.6362629246676514,
      "grad_norm": 0.2650850713253021,
      "learning_rate": 0.00015767951718191896,
      "loss": 0.2636,
      "step": 1723
    },
    {
      "epoch": 0.6366322008862629,
      "grad_norm": 0.22631783783435822,
      "learning_rate": 0.00015765488360635547,
      "loss": 0.2606,
      "step": 1724
    },
    {
      "epoch": 0.6370014771048744,
      "grad_norm": 0.28124675154685974,
      "learning_rate": 0.00015763025003079196,
      "loss": 0.2768,
      "step": 1725
    },
    {
      "epoch": 0.6373707533234859,
      "grad_norm": 0.26289159059524536,
      "learning_rate": 0.00015760561645522848,
      "loss": 0.3184,
      "step": 1726
    },
    {
      "epoch": 0.6377400295420975,
      "grad_norm": 0.26568078994750977,
      "learning_rate": 0.000157580982879665,
      "loss": 0.2835,
      "step": 1727
    },
    {
      "epoch": 0.638109305760709,
      "grad_norm": 0.26100102066993713,
      "learning_rate": 0.0001575563493041015,
      "loss": 0.3013,
      "step": 1728
    },
    {
      "epoch": 0.6384785819793205,
      "grad_norm": 0.2617495656013489,
      "learning_rate": 0.000157531715728538,
      "loss": 0.3099,
      "step": 1729
    },
    {
      "epoch": 0.6388478581979321,
      "grad_norm": 0.24113066494464874,
      "learning_rate": 0.0001575070821529745,
      "loss": 0.2601,
      "step": 1730
    },
    {
      "epoch": 0.6392171344165436,
      "grad_norm": 0.2747572958469391,
      "learning_rate": 0.00015748244857741102,
      "loss": 0.3056,
      "step": 1731
    },
    {
      "epoch": 0.6395864106351551,
      "grad_norm": 0.23434069752693176,
      "learning_rate": 0.00015745781500184754,
      "loss": 0.3247,
      "step": 1732
    },
    {
      "epoch": 0.6399556868537666,
      "grad_norm": 0.2345058023929596,
      "learning_rate": 0.00015743318142628403,
      "loss": 0.3043,
      "step": 1733
    },
    {
      "epoch": 0.6403249630723782,
      "grad_norm": 0.29427266120910645,
      "learning_rate": 0.00015740854785072054,
      "loss": 0.3494,
      "step": 1734
    },
    {
      "epoch": 0.6406942392909897,
      "grad_norm": 0.2852862775325775,
      "learning_rate": 0.00015738391427515703,
      "loss": 0.3587,
      "step": 1735
    },
    {
      "epoch": 0.6410635155096012,
      "grad_norm": 0.2725900113582611,
      "learning_rate": 0.00015735928069959357,
      "loss": 0.2966,
      "step": 1736
    },
    {
      "epoch": 0.6414327917282127,
      "grad_norm": 0.30448734760284424,
      "learning_rate": 0.00015733464712403006,
      "loss": 0.3303,
      "step": 1737
    },
    {
      "epoch": 0.6418020679468243,
      "grad_norm": 0.25536197423934937,
      "learning_rate": 0.00015731001354846657,
      "loss": 0.3302,
      "step": 1738
    },
    {
      "epoch": 0.6421713441654358,
      "grad_norm": 0.31083613634109497,
      "learning_rate": 0.00015728537997290306,
      "loss": 0.3966,
      "step": 1739
    },
    {
      "epoch": 0.6425406203840472,
      "grad_norm": 0.2993643879890442,
      "learning_rate": 0.00015726074639733957,
      "loss": 0.3034,
      "step": 1740
    },
    {
      "epoch": 0.6429098966026587,
      "grad_norm": 0.31519269943237305,
      "learning_rate": 0.0001572361128217761,
      "loss": 0.2813,
      "step": 1741
    },
    {
      "epoch": 0.6432791728212703,
      "grad_norm": 0.25670289993286133,
      "learning_rate": 0.0001572114792462126,
      "loss": 0.2646,
      "step": 1742
    },
    {
      "epoch": 0.6436484490398818,
      "grad_norm": 0.25279560685157776,
      "learning_rate": 0.0001571868456706491,
      "loss": 0.2919,
      "step": 1743
    },
    {
      "epoch": 0.6440177252584933,
      "grad_norm": 0.2225867360830307,
      "learning_rate": 0.0001571622120950856,
      "loss": 0.2817,
      "step": 1744
    },
    {
      "epoch": 0.6443870014771049,
      "grad_norm": 0.2555961608886719,
      "learning_rate": 0.00015713757851952212,
      "loss": 0.3269,
      "step": 1745
    },
    {
      "epoch": 0.6447562776957164,
      "grad_norm": 0.27634233236312866,
      "learning_rate": 0.00015711294494395863,
      "loss": 0.3494,
      "step": 1746
    },
    {
      "epoch": 0.6451255539143279,
      "grad_norm": 0.2750725746154785,
      "learning_rate": 0.00015708831136839512,
      "loss": 0.3578,
      "step": 1747
    },
    {
      "epoch": 0.6454948301329394,
      "grad_norm": 0.30054372549057007,
      "learning_rate": 0.00015706367779283164,
      "loss": 0.3006,
      "step": 1748
    },
    {
      "epoch": 0.645864106351551,
      "grad_norm": 0.25382721424102783,
      "learning_rate": 0.00015703904421726815,
      "loss": 0.2913,
      "step": 1749
    },
    {
      "epoch": 0.6462333825701625,
      "grad_norm": 0.2811352014541626,
      "learning_rate": 0.00015701441064170467,
      "loss": 0.3342,
      "step": 1750
    },
    {
      "epoch": 0.6462333825701625,
      "eval_loss": 0.29770615696907043,
      "eval_runtime": 5.8464,
      "eval_samples_per_second": 8.552,
      "eval_steps_per_second": 1.197,
      "step": 1750
    },
    {
      "epoch": 0.646602658788774,
      "grad_norm": 0.27353596687316895,
      "learning_rate": 0.00015698977706614115,
      "loss": 0.2868,
      "step": 1751
    },
    {
      "epoch": 0.6469719350073855,
      "grad_norm": 0.3044775128364563,
      "learning_rate": 0.00015696514349057767,
      "loss": 0.3465,
      "step": 1752
    },
    {
      "epoch": 0.6473412112259971,
      "grad_norm": 0.26236966252326965,
      "learning_rate": 0.00015694050991501416,
      "loss": 0.2984,
      "step": 1753
    },
    {
      "epoch": 0.6477104874446086,
      "grad_norm": 0.26648154854774475,
      "learning_rate": 0.0001569158763394507,
      "loss": 0.25,
      "step": 1754
    },
    {
      "epoch": 0.6480797636632201,
      "grad_norm": 0.22502334415912628,
      "learning_rate": 0.00015689124276388718,
      "loss": 0.3028,
      "step": 1755
    },
    {
      "epoch": 0.6484490398818316,
      "grad_norm": 0.3449307382106781,
      "learning_rate": 0.0001568666091883237,
      "loss": 0.3149,
      "step": 1756
    },
    {
      "epoch": 0.6488183161004432,
      "grad_norm": 0.2570543885231018,
      "learning_rate": 0.0001568419756127602,
      "loss": 0.2916,
      "step": 1757
    },
    {
      "epoch": 0.6491875923190547,
      "grad_norm": 0.30169492959976196,
      "learning_rate": 0.0001568173420371967,
      "loss": 0.3588,
      "step": 1758
    },
    {
      "epoch": 0.6495568685376661,
      "grad_norm": 0.2638286054134369,
      "learning_rate": 0.00015679270846163322,
      "loss": 0.3143,
      "step": 1759
    },
    {
      "epoch": 0.6499261447562777,
      "grad_norm": 0.2750193476676941,
      "learning_rate": 0.00015676807488606973,
      "loss": 0.3078,
      "step": 1760
    },
    {
      "epoch": 0.6502954209748892,
      "grad_norm": 0.25509074330329895,
      "learning_rate": 0.00015674344131050622,
      "loss": 0.2535,
      "step": 1761
    },
    {
      "epoch": 0.6506646971935007,
      "grad_norm": 0.23687632381916046,
      "learning_rate": 0.00015671880773494273,
      "loss": 0.2834,
      "step": 1762
    },
    {
      "epoch": 0.6510339734121122,
      "grad_norm": 0.25632792711257935,
      "learning_rate": 0.00015669417415937925,
      "loss": 0.3015,
      "step": 1763
    },
    {
      "epoch": 0.6514032496307238,
      "grad_norm": 0.2542288601398468,
      "learning_rate": 0.00015666954058381576,
      "loss": 0.2827,
      "step": 1764
    },
    {
      "epoch": 0.6517725258493353,
      "grad_norm": 0.3169395923614502,
      "learning_rate": 0.00015664490700825225,
      "loss": 0.3251,
      "step": 1765
    },
    {
      "epoch": 0.6521418020679468,
      "grad_norm": 0.28818562626838684,
      "learning_rate": 0.00015662027343268876,
      "loss": 0.3101,
      "step": 1766
    },
    {
      "epoch": 0.6525110782865583,
      "grad_norm": 0.2993204891681671,
      "learning_rate": 0.00015659563985712525,
      "loss": 0.336,
      "step": 1767
    },
    {
      "epoch": 0.6528803545051699,
      "grad_norm": 0.2634614109992981,
      "learning_rate": 0.0001565710062815618,
      "loss": 0.3328,
      "step": 1768
    },
    {
      "epoch": 0.6532496307237814,
      "grad_norm": 0.29314500093460083,
      "learning_rate": 0.00015654637270599828,
      "loss": 0.38,
      "step": 1769
    },
    {
      "epoch": 0.6536189069423929,
      "grad_norm": 0.24318912625312805,
      "learning_rate": 0.0001565217391304348,
      "loss": 0.2596,
      "step": 1770
    },
    {
      "epoch": 0.6539881831610044,
      "grad_norm": 0.28800836205482483,
      "learning_rate": 0.00015649710555487128,
      "loss": 0.3233,
      "step": 1771
    },
    {
      "epoch": 0.654357459379616,
      "grad_norm": 0.28744104504585266,
      "learning_rate": 0.0001564724719793078,
      "loss": 0.3106,
      "step": 1772
    },
    {
      "epoch": 0.6547267355982275,
      "grad_norm": 0.2733234465122223,
      "learning_rate": 0.0001564478384037443,
      "loss": 0.3522,
      "step": 1773
    },
    {
      "epoch": 0.655096011816839,
      "grad_norm": 0.3021034300327301,
      "learning_rate": 0.00015642320482818083,
      "loss": 0.3618,
      "step": 1774
    },
    {
      "epoch": 0.6554652880354506,
      "grad_norm": 0.28585657477378845,
      "learning_rate": 0.00015639857125261731,
      "loss": 0.325,
      "step": 1775
    },
    {
      "epoch": 0.6558345642540621,
      "grad_norm": 0.3492552936077118,
      "learning_rate": 0.00015637393767705383,
      "loss": 0.3431,
      "step": 1776
    },
    {
      "epoch": 0.6562038404726735,
      "grad_norm": 0.26289793848991394,
      "learning_rate": 0.00015634930410149034,
      "loss": 0.2999,
      "step": 1777
    },
    {
      "epoch": 0.656573116691285,
      "grad_norm": 0.2919107675552368,
      "learning_rate": 0.00015632467052592686,
      "loss": 0.2475,
      "step": 1778
    },
    {
      "epoch": 0.6569423929098966,
      "grad_norm": 0.2998773455619812,
      "learning_rate": 0.00015630003695036335,
      "loss": 0.3243,
      "step": 1779
    },
    {
      "epoch": 0.6573116691285081,
      "grad_norm": 0.30049073696136475,
      "learning_rate": 0.00015627540337479986,
      "loss": 0.3504,
      "step": 1780
    },
    {
      "epoch": 0.6576809453471196,
      "grad_norm": 0.2768280506134033,
      "learning_rate": 0.00015625076979923638,
      "loss": 0.3045,
      "step": 1781
    },
    {
      "epoch": 0.6580502215657311,
      "grad_norm": 0.2078462392091751,
      "learning_rate": 0.0001562261362236729,
      "loss": 0.2303,
      "step": 1782
    },
    {
      "epoch": 0.6584194977843427,
      "grad_norm": 0.3140887916088104,
      "learning_rate": 0.00015620150264810938,
      "loss": 0.3731,
      "step": 1783
    },
    {
      "epoch": 0.6587887740029542,
      "grad_norm": 0.23740491271018982,
      "learning_rate": 0.0001561768690725459,
      "loss": 0.2617,
      "step": 1784
    },
    {
      "epoch": 0.6591580502215657,
      "grad_norm": 0.38925233483314514,
      "learning_rate": 0.00015615223549698238,
      "loss": 0.416,
      "step": 1785
    },
    {
      "epoch": 0.6595273264401772,
      "grad_norm": 0.2521016001701355,
      "learning_rate": 0.00015612760192141892,
      "loss": 0.2772,
      "step": 1786
    },
    {
      "epoch": 0.6598966026587888,
      "grad_norm": 0.2719894349575043,
      "learning_rate": 0.0001561029683458554,
      "loss": 0.309,
      "step": 1787
    },
    {
      "epoch": 0.6602658788774003,
      "grad_norm": 0.28100425004959106,
      "learning_rate": 0.00015607833477029192,
      "loss": 0.3362,
      "step": 1788
    },
    {
      "epoch": 0.6606351550960118,
      "grad_norm": 0.2446250170469284,
      "learning_rate": 0.0001560537011947284,
      "loss": 0.2647,
      "step": 1789
    },
    {
      "epoch": 0.6610044313146234,
      "grad_norm": 0.28099167346954346,
      "learning_rate": 0.00015602906761916493,
      "loss": 0.3005,
      "step": 1790
    },
    {
      "epoch": 0.6613737075332349,
      "grad_norm": 0.23360125720500946,
      "learning_rate": 0.00015600443404360144,
      "loss": 0.2445,
      "step": 1791
    },
    {
      "epoch": 0.6617429837518464,
      "grad_norm": 0.28586718440055847,
      "learning_rate": 0.00015597980046803796,
      "loss": 0.3461,
      "step": 1792
    },
    {
      "epoch": 0.6621122599704579,
      "grad_norm": 0.2865604758262634,
      "learning_rate": 0.00015595516689247444,
      "loss": 0.3345,
      "step": 1793
    },
    {
      "epoch": 0.6624815361890695,
      "grad_norm": 0.26880353689193726,
      "learning_rate": 0.00015593053331691096,
      "loss": 0.2974,
      "step": 1794
    },
    {
      "epoch": 0.662850812407681,
      "grad_norm": 0.24530526995658875,
      "learning_rate": 0.00015590589974134747,
      "loss": 0.3113,
      "step": 1795
    },
    {
      "epoch": 0.6632200886262924,
      "grad_norm": 0.29964113235473633,
      "learning_rate": 0.000155881266165784,
      "loss": 0.3239,
      "step": 1796
    },
    {
      "epoch": 0.6635893648449039,
      "grad_norm": 0.24047644436359406,
      "learning_rate": 0.00015585663259022047,
      "loss": 0.2807,
      "step": 1797
    },
    {
      "epoch": 0.6639586410635155,
      "grad_norm": 0.2512739896774292,
      "learning_rate": 0.000155831999014657,
      "loss": 0.3065,
      "step": 1798
    },
    {
      "epoch": 0.664327917282127,
      "grad_norm": 0.2920621633529663,
      "learning_rate": 0.00015580736543909348,
      "loss": 0.3676,
      "step": 1799
    },
    {
      "epoch": 0.6646971935007385,
      "grad_norm": 0.2532987892627716,
      "learning_rate": 0.00015578273186353002,
      "loss": 0.268,
      "step": 1800
    },
    {
      "epoch": 0.6646971935007385,
      "eval_loss": 0.2948981523513794,
      "eval_runtime": 5.8636,
      "eval_samples_per_second": 8.527,
      "eval_steps_per_second": 1.194,
      "step": 1800
    },
    {
      "epoch": 0.6650664697193501,
      "grad_norm": 0.3084225356578827,
      "learning_rate": 0.0001557580982879665,
      "loss": 0.3474,
      "step": 1801
    },
    {
      "epoch": 0.6654357459379616,
      "grad_norm": 0.2657549977302551,
      "learning_rate": 0.00015573346471240302,
      "loss": 0.3271,
      "step": 1802
    },
    {
      "epoch": 0.6658050221565731,
      "grad_norm": 0.29108577966690063,
      "learning_rate": 0.0001557088311368395,
      "loss": 0.3392,
      "step": 1803
    },
    {
      "epoch": 0.6661742983751846,
      "grad_norm": 0.26664069294929504,
      "learning_rate": 0.00015568419756127602,
      "loss": 0.3126,
      "step": 1804
    },
    {
      "epoch": 0.6665435745937962,
      "grad_norm": 0.2917342782020569,
      "learning_rate": 0.00015565956398571254,
      "loss": 0.3592,
      "step": 1805
    },
    {
      "epoch": 0.6669128508124077,
      "grad_norm": 0.2624189853668213,
      "learning_rate": 0.00015563493041014905,
      "loss": 0.297,
      "step": 1806
    },
    {
      "epoch": 0.6672821270310192,
      "grad_norm": 0.29515746235847473,
      "learning_rate": 0.00015561029683458554,
      "loss": 0.317,
      "step": 1807
    },
    {
      "epoch": 0.6676514032496307,
      "grad_norm": 0.3128635287284851,
      "learning_rate": 0.00015558566325902205,
      "loss": 0.3551,
      "step": 1808
    },
    {
      "epoch": 0.6680206794682423,
      "grad_norm": 0.2330590784549713,
      "learning_rate": 0.00015556102968345857,
      "loss": 0.2613,
      "step": 1809
    },
    {
      "epoch": 0.6683899556868538,
      "grad_norm": 0.2911098003387451,
      "learning_rate": 0.00015553639610789508,
      "loss": 0.3516,
      "step": 1810
    },
    {
      "epoch": 0.6687592319054653,
      "grad_norm": 0.3263610601425171,
      "learning_rate": 0.00015551176253233157,
      "loss": 0.3869,
      "step": 1811
    },
    {
      "epoch": 0.6691285081240768,
      "grad_norm": 0.32457396388053894,
      "learning_rate": 0.00015548712895676809,
      "loss": 0.407,
      "step": 1812
    },
    {
      "epoch": 0.6694977843426884,
      "grad_norm": 0.2424752414226532,
      "learning_rate": 0.0001554624953812046,
      "loss": 0.3011,
      "step": 1813
    },
    {
      "epoch": 0.6698670605612999,
      "grad_norm": 0.29370442032814026,
      "learning_rate": 0.00015543786180564111,
      "loss": 0.3433,
      "step": 1814
    },
    {
      "epoch": 0.6702363367799113,
      "grad_norm": 0.30061599612236023,
      "learning_rate": 0.0001554132282300776,
      "loss": 0.3303,
      "step": 1815
    },
    {
      "epoch": 0.670605612998523,
      "grad_norm": 0.2254580557346344,
      "learning_rate": 0.00015538859465451412,
      "loss": 0.2378,
      "step": 1816
    },
    {
      "epoch": 0.6709748892171344,
      "grad_norm": 0.25725165009498596,
      "learning_rate": 0.0001553639610789506,
      "loss": 0.2848,
      "step": 1817
    },
    {
      "epoch": 0.6713441654357459,
      "grad_norm": 0.27629512548446655,
      "learning_rate": 0.00015533932750338715,
      "loss": 0.3425,
      "step": 1818
    },
    {
      "epoch": 0.6717134416543574,
      "grad_norm": 0.27350887656211853,
      "learning_rate": 0.00015531469392782363,
      "loss": 0.3021,
      "step": 1819
    },
    {
      "epoch": 0.672082717872969,
      "grad_norm": 0.28401410579681396,
      "learning_rate": 0.00015529006035226015,
      "loss": 0.2829,
      "step": 1820
    },
    {
      "epoch": 0.6724519940915805,
      "grad_norm": 0.21475425362586975,
      "learning_rate": 0.00015526542677669664,
      "loss": 0.2587,
      "step": 1821
    },
    {
      "epoch": 0.672821270310192,
      "grad_norm": 0.28110092878341675,
      "learning_rate": 0.00015524079320113315,
      "loss": 0.342,
      "step": 1822
    },
    {
      "epoch": 0.6731905465288035,
      "grad_norm": 0.2703840136528015,
      "learning_rate": 0.00015521615962556967,
      "loss": 0.2876,
      "step": 1823
    },
    {
      "epoch": 0.6735598227474151,
      "grad_norm": 0.29484739899635315,
      "learning_rate": 0.00015519152605000618,
      "loss": 0.32,
      "step": 1824
    },
    {
      "epoch": 0.6739290989660266,
      "grad_norm": 0.2943943440914154,
      "learning_rate": 0.00015516689247444267,
      "loss": 0.3942,
      "step": 1825
    },
    {
      "epoch": 0.6742983751846381,
      "grad_norm": 0.24084927141666412,
      "learning_rate": 0.00015514225889887918,
      "loss": 0.2797,
      "step": 1826
    },
    {
      "epoch": 0.6746676514032496,
      "grad_norm": 0.2672373354434967,
      "learning_rate": 0.0001551176253233157,
      "loss": 0.345,
      "step": 1827
    },
    {
      "epoch": 0.6750369276218612,
      "grad_norm": 0.32839083671569824,
      "learning_rate": 0.0001550929917477522,
      "loss": 0.3829,
      "step": 1828
    },
    {
      "epoch": 0.6754062038404727,
      "grad_norm": 0.3239312767982483,
      "learning_rate": 0.0001550683581721887,
      "loss": 0.3548,
      "step": 1829
    },
    {
      "epoch": 0.6757754800590842,
      "grad_norm": 0.24574360251426697,
      "learning_rate": 0.00015504372459662521,
      "loss": 0.2696,
      "step": 1830
    },
    {
      "epoch": 0.6761447562776958,
      "grad_norm": 0.3067609965801239,
      "learning_rate": 0.0001550190910210617,
      "loss": 0.357,
      "step": 1831
    },
    {
      "epoch": 0.6765140324963073,
      "grad_norm": 0.2867920994758606,
      "learning_rate": 0.00015499445744549824,
      "loss": 0.3265,
      "step": 1832
    },
    {
      "epoch": 0.6768833087149188,
      "grad_norm": 0.23799139261245728,
      "learning_rate": 0.00015496982386993473,
      "loss": 0.3095,
      "step": 1833
    },
    {
      "epoch": 0.6772525849335302,
      "grad_norm": 0.3008800745010376,
      "learning_rate": 0.00015494519029437125,
      "loss": 0.3585,
      "step": 1834
    },
    {
      "epoch": 0.6776218611521418,
      "grad_norm": 0.2254391759634018,
      "learning_rate": 0.00015492055671880773,
      "loss": 0.2715,
      "step": 1835
    },
    {
      "epoch": 0.6779911373707533,
      "grad_norm": 0.2766897976398468,
      "learning_rate": 0.00015489592314324425,
      "loss": 0.2888,
      "step": 1836
    },
    {
      "epoch": 0.6783604135893648,
      "grad_norm": 0.25781330466270447,
      "learning_rate": 0.00015487128956768076,
      "loss": 0.2988,
      "step": 1837
    },
    {
      "epoch": 0.6787296898079763,
      "grad_norm": 0.2826957106590271,
      "learning_rate": 0.00015484665599211728,
      "loss": 0.3256,
      "step": 1838
    },
    {
      "epoch": 0.6790989660265879,
      "grad_norm": 0.32387325167655945,
      "learning_rate": 0.00015482202241655376,
      "loss": 0.3751,
      "step": 1839
    },
    {
      "epoch": 0.6794682422451994,
      "grad_norm": 0.23880648612976074,
      "learning_rate": 0.00015479738884099028,
      "loss": 0.256,
      "step": 1840
    },
    {
      "epoch": 0.6798375184638109,
      "grad_norm": 0.2831343710422516,
      "learning_rate": 0.0001547727552654268,
      "loss": 0.2981,
      "step": 1841
    },
    {
      "epoch": 0.6802067946824224,
      "grad_norm": 0.2651851177215576,
      "learning_rate": 0.0001547481216898633,
      "loss": 0.3318,
      "step": 1842
    },
    {
      "epoch": 0.680576070901034,
      "grad_norm": 0.28197112679481506,
      "learning_rate": 0.0001547234881142998,
      "loss": 0.3174,
      "step": 1843
    },
    {
      "epoch": 0.6809453471196455,
      "grad_norm": 0.30436182022094727,
      "learning_rate": 0.0001546988545387363,
      "loss": 0.3748,
      "step": 1844
    },
    {
      "epoch": 0.681314623338257,
      "grad_norm": 0.300368994474411,
      "learning_rate": 0.0001546742209631728,
      "loss": 0.3496,
      "step": 1845
    },
    {
      "epoch": 0.6816838995568686,
      "grad_norm": 0.2905513048171997,
      "learning_rate": 0.00015464958738760934,
      "loss": 0.3193,
      "step": 1846
    },
    {
      "epoch": 0.6820531757754801,
      "grad_norm": 0.3128775358200073,
      "learning_rate": 0.00015462495381204583,
      "loss": 0.3175,
      "step": 1847
    },
    {
      "epoch": 0.6824224519940916,
      "grad_norm": 0.31336894631385803,
      "learning_rate": 0.00015460032023648234,
      "loss": 0.359,
      "step": 1848
    },
    {
      "epoch": 0.6827917282127031,
      "grad_norm": 0.33619949221611023,
      "learning_rate": 0.00015457568666091883,
      "loss": 0.3467,
      "step": 1849
    },
    {
      "epoch": 0.6831610044313147,
      "grad_norm": 0.3280799090862274,
      "learning_rate": 0.00015455105308535537,
      "loss": 0.3398,
      "step": 1850
    },
    {
      "epoch": 0.6831610044313147,
      "eval_loss": 0.2983386218547821,
      "eval_runtime": 5.8536,
      "eval_samples_per_second": 8.542,
      "eval_steps_per_second": 1.196,
      "step": 1850
    },
    {
      "epoch": 0.6835302806499262,
      "grad_norm": 0.33054792881011963,
      "learning_rate": 0.00015452641950979186,
      "loss": 0.353,
      "step": 1851
    },
    {
      "epoch": 0.6838995568685377,
      "grad_norm": 0.2995906472206116,
      "learning_rate": 0.00015450178593422837,
      "loss": 0.3251,
      "step": 1852
    },
    {
      "epoch": 0.6842688330871491,
      "grad_norm": 0.2561552822589874,
      "learning_rate": 0.00015447715235866486,
      "loss": 0.2844,
      "step": 1853
    },
    {
      "epoch": 0.6846381093057607,
      "grad_norm": 0.22382104396820068,
      "learning_rate": 0.00015445251878310138,
      "loss": 0.2641,
      "step": 1854
    },
    {
      "epoch": 0.6850073855243722,
      "grad_norm": 0.2484665811061859,
      "learning_rate": 0.0001544278852075379,
      "loss": 0.2528,
      "step": 1855
    },
    {
      "epoch": 0.6853766617429837,
      "grad_norm": 0.29215025901794434,
      "learning_rate": 0.0001544032516319744,
      "loss": 0.3128,
      "step": 1856
    },
    {
      "epoch": 0.6857459379615952,
      "grad_norm": 0.29227593541145325,
      "learning_rate": 0.0001543786180564109,
      "loss": 0.3142,
      "step": 1857
    },
    {
      "epoch": 0.6861152141802068,
      "grad_norm": 0.32659927010536194,
      "learning_rate": 0.0001543539844808474,
      "loss": 0.3625,
      "step": 1858
    },
    {
      "epoch": 0.6864844903988183,
      "grad_norm": 0.37861353158950806,
      "learning_rate": 0.00015432935090528392,
      "loss": 0.3605,
      "step": 1859
    },
    {
      "epoch": 0.6868537666174298,
      "grad_norm": 0.2864838242530823,
      "learning_rate": 0.00015430471732972044,
      "loss": 0.2972,
      "step": 1860
    },
    {
      "epoch": 0.6872230428360414,
      "grad_norm": 0.37199172377586365,
      "learning_rate": 0.00015428008375415692,
      "loss": 0.4036,
      "step": 1861
    },
    {
      "epoch": 0.6875923190546529,
      "grad_norm": 0.3137976825237274,
      "learning_rate": 0.0001542554501785934,
      "loss": 0.3722,
      "step": 1862
    },
    {
      "epoch": 0.6879615952732644,
      "grad_norm": 0.2716263234615326,
      "learning_rate": 0.00015423081660302993,
      "loss": 0.3142,
      "step": 1863
    },
    {
      "epoch": 0.6883308714918759,
      "grad_norm": 0.25997471809387207,
      "learning_rate": 0.00015420618302746644,
      "loss": 0.2782,
      "step": 1864
    },
    {
      "epoch": 0.6887001477104875,
      "grad_norm": 0.2467002421617508,
      "learning_rate": 0.00015418154945190295,
      "loss": 0.2902,
      "step": 1865
    },
    {
      "epoch": 0.689069423929099,
      "grad_norm": 0.3040105700492859,
      "learning_rate": 0.00015415691587633944,
      "loss": 0.3599,
      "step": 1866
    },
    {
      "epoch": 0.6894387001477105,
      "grad_norm": 0.27652984857559204,
      "learning_rate": 0.00015413228230077596,
      "loss": 0.3071,
      "step": 1867
    },
    {
      "epoch": 0.689807976366322,
      "grad_norm": 0.31874772906303406,
      "learning_rate": 0.00015410764872521247,
      "loss": 0.3063,
      "step": 1868
    },
    {
      "epoch": 0.6901772525849336,
      "grad_norm": 0.3031354248523712,
      "learning_rate": 0.00015408301514964899,
      "loss": 0.3321,
      "step": 1869
    },
    {
      "epoch": 0.6905465288035451,
      "grad_norm": 0.27512142062187195,
      "learning_rate": 0.00015405838157408547,
      "loss": 0.316,
      "step": 1870
    },
    {
      "epoch": 0.6909158050221565,
      "grad_norm": 0.23150752484798431,
      "learning_rate": 0.000154033747998522,
      "loss": 0.2924,
      "step": 1871
    },
    {
      "epoch": 0.691285081240768,
      "grad_norm": 0.2737838327884674,
      "learning_rate": 0.00015400911442295848,
      "loss": 0.2814,
      "step": 1872
    },
    {
      "epoch": 0.6916543574593796,
      "grad_norm": 0.2643684446811676,
      "learning_rate": 0.00015398448084739502,
      "loss": 0.303,
      "step": 1873
    },
    {
      "epoch": 0.6920236336779911,
      "grad_norm": 0.42378824949264526,
      "learning_rate": 0.0001539598472718315,
      "loss": 0.3167,
      "step": 1874
    },
    {
      "epoch": 0.6923929098966026,
      "grad_norm": 0.2837834656238556,
      "learning_rate": 0.00015393521369626802,
      "loss": 0.2948,
      "step": 1875
    },
    {
      "epoch": 0.6927621861152142,
      "grad_norm": 0.24619752168655396,
      "learning_rate": 0.0001539105801207045,
      "loss": 0.3024,
      "step": 1876
    },
    {
      "epoch": 0.6931314623338257,
      "grad_norm": 0.24698737263679504,
      "learning_rate": 0.00015388594654514102,
      "loss": 0.2985,
      "step": 1877
    },
    {
      "epoch": 0.6935007385524372,
      "grad_norm": 0.3062925636768341,
      "learning_rate": 0.00015386131296957754,
      "loss": 0.3311,
      "step": 1878
    },
    {
      "epoch": 0.6938700147710487,
      "grad_norm": 0.2970742881298065,
      "learning_rate": 0.00015383667939401405,
      "loss": 0.3188,
      "step": 1879
    },
    {
      "epoch": 0.6942392909896603,
      "grad_norm": 0.2796284854412079,
      "learning_rate": 0.00015381204581845054,
      "loss": 0.3468,
      "step": 1880
    },
    {
      "epoch": 0.6946085672082718,
      "grad_norm": 0.278063029050827,
      "learning_rate": 0.00015378741224288705,
      "loss": 0.3119,
      "step": 1881
    },
    {
      "epoch": 0.6949778434268833,
      "grad_norm": 0.32742705941200256,
      "learning_rate": 0.00015376277866732357,
      "loss": 0.2612,
      "step": 1882
    },
    {
      "epoch": 0.6953471196454948,
      "grad_norm": 0.29229697585105896,
      "learning_rate": 0.00015373814509176008,
      "loss": 0.2931,
      "step": 1883
    },
    {
      "epoch": 0.6957163958641064,
      "grad_norm": 0.27059805393218994,
      "learning_rate": 0.00015371351151619657,
      "loss": 0.3153,
      "step": 1884
    },
    {
      "epoch": 0.6960856720827179,
      "grad_norm": 0.3111379146575928,
      "learning_rate": 0.00015368887794063309,
      "loss": 0.3611,
      "step": 1885
    },
    {
      "epoch": 0.6964549483013294,
      "grad_norm": 0.23783890902996063,
      "learning_rate": 0.0001536642443650696,
      "loss": 0.2944,
      "step": 1886
    },
    {
      "epoch": 0.696824224519941,
      "grad_norm": 0.24644704163074493,
      "learning_rate": 0.00015363961078950611,
      "loss": 0.2545,
      "step": 1887
    },
    {
      "epoch": 0.6971935007385525,
      "grad_norm": 0.28930196166038513,
      "learning_rate": 0.0001536149772139426,
      "loss": 0.324,
      "step": 1888
    },
    {
      "epoch": 0.697562776957164,
      "grad_norm": 0.4145079553127289,
      "learning_rate": 0.00015359034363837912,
      "loss": 0.4474,
      "step": 1889
    },
    {
      "epoch": 0.6979320531757754,
      "grad_norm": 0.3037989139556885,
      "learning_rate": 0.0001535657100628156,
      "loss": 0.3236,
      "step": 1890
    },
    {
      "epoch": 0.698301329394387,
      "grad_norm": 0.26441720128059387,
      "learning_rate": 0.00015354107648725215,
      "loss": 0.26,
      "step": 1891
    },
    {
      "epoch": 0.6986706056129985,
      "grad_norm": 0.23267091810703278,
      "learning_rate": 0.00015351644291168863,
      "loss": 0.2776,
      "step": 1892
    },
    {
      "epoch": 0.69903988183161,
      "grad_norm": 0.489742636680603,
      "learning_rate": 0.00015349180933612515,
      "loss": 0.3788,
      "step": 1893
    },
    {
      "epoch": 0.6994091580502215,
      "grad_norm": 0.32214441895484924,
      "learning_rate": 0.00015346717576056164,
      "loss": 0.3675,
      "step": 1894
    },
    {
      "epoch": 0.6997784342688331,
      "grad_norm": 0.29301875829696655,
      "learning_rate": 0.00015344254218499815,
      "loss": 0.3549,
      "step": 1895
    },
    {
      "epoch": 0.7001477104874446,
      "grad_norm": 0.2997570037841797,
      "learning_rate": 0.00015341790860943466,
      "loss": 0.2991,
      "step": 1896
    },
    {
      "epoch": 0.7005169867060561,
      "grad_norm": 0.30343782901763916,
      "learning_rate": 0.00015339327503387118,
      "loss": 0.3299,
      "step": 1897
    },
    {
      "epoch": 0.7008862629246676,
      "grad_norm": 0.3107844889163971,
      "learning_rate": 0.00015336864145830767,
      "loss": 0.3547,
      "step": 1898
    },
    {
      "epoch": 0.7012555391432792,
      "grad_norm": 0.26904937624931335,
      "learning_rate": 0.00015334400788274418,
      "loss": 0.3074,
      "step": 1899
    },
    {
      "epoch": 0.7016248153618907,
      "grad_norm": 0.3177255690097809,
      "learning_rate": 0.0001533193743071807,
      "loss": 0.3576,
      "step": 1900
    },
    {
      "epoch": 0.7016248153618907,
      "eval_loss": 0.29446089267730713,
      "eval_runtime": 5.8552,
      "eval_samples_per_second": 8.539,
      "eval_steps_per_second": 1.196,
      "step": 1900
    },
    {
      "epoch": 0.7019940915805022,
      "grad_norm": 0.20602965354919434,
      "learning_rate": 0.0001532947407316172,
      "loss": 0.2409,
      "step": 1901
    },
    {
      "epoch": 0.7023633677991138,
      "grad_norm": 0.290698379278183,
      "learning_rate": 0.0001532701071560537,
      "loss": 0.3272,
      "step": 1902
    },
    {
      "epoch": 0.7027326440177253,
      "grad_norm": 0.2832525670528412,
      "learning_rate": 0.0001532454735804902,
      "loss": 0.2954,
      "step": 1903
    },
    {
      "epoch": 0.7031019202363368,
      "grad_norm": 0.2994844913482666,
      "learning_rate": 0.0001532208400049267,
      "loss": 0.3127,
      "step": 1904
    },
    {
      "epoch": 0.7034711964549483,
      "grad_norm": 0.27429550886154175,
      "learning_rate": 0.00015319620642936324,
      "loss": 0.3643,
      "step": 1905
    },
    {
      "epoch": 0.7038404726735599,
      "grad_norm": 0.2514464855194092,
      "learning_rate": 0.00015317157285379973,
      "loss": 0.2824,
      "step": 1906
    },
    {
      "epoch": 0.7042097488921714,
      "grad_norm": 0.2964741289615631,
      "learning_rate": 0.00015314693927823624,
      "loss": 0.3267,
      "step": 1907
    },
    {
      "epoch": 0.7045790251107829,
      "grad_norm": 0.2951356768608093,
      "learning_rate": 0.00015312230570267273,
      "loss": 0.3232,
      "step": 1908
    },
    {
      "epoch": 0.7049483013293943,
      "grad_norm": 0.2574373781681061,
      "learning_rate": 0.00015309767212710925,
      "loss": 0.3239,
      "step": 1909
    },
    {
      "epoch": 0.705317577548006,
      "grad_norm": 0.24858419597148895,
      "learning_rate": 0.00015307303855154576,
      "loss": 0.2769,
      "step": 1910
    },
    {
      "epoch": 0.7056868537666174,
      "grad_norm": 0.31815093755722046,
      "learning_rate": 0.00015304840497598228,
      "loss": 0.3433,
      "step": 1911
    },
    {
      "epoch": 0.7060561299852289,
      "grad_norm": 0.32545092701911926,
      "learning_rate": 0.00015302377140041876,
      "loss": 0.2926,
      "step": 1912
    },
    {
      "epoch": 0.7064254062038404,
      "grad_norm": 0.25615841150283813,
      "learning_rate": 0.00015299913782485528,
      "loss": 0.3072,
      "step": 1913
    },
    {
      "epoch": 0.706794682422452,
      "grad_norm": 0.2847903072834015,
      "learning_rate": 0.0001529745042492918,
      "loss": 0.3794,
      "step": 1914
    },
    {
      "epoch": 0.7071639586410635,
      "grad_norm": 0.2633518576622009,
      "learning_rate": 0.0001529498706737283,
      "loss": 0.2807,
      "step": 1915
    },
    {
      "epoch": 0.707533234859675,
      "grad_norm": 0.28141912817955017,
      "learning_rate": 0.0001529252370981648,
      "loss": 0.3122,
      "step": 1916
    },
    {
      "epoch": 0.7079025110782866,
      "grad_norm": 0.20738820731639862,
      "learning_rate": 0.0001529006035226013,
      "loss": 0.1951,
      "step": 1917
    },
    {
      "epoch": 0.7082717872968981,
      "grad_norm": 0.29588785767555237,
      "learning_rate": 0.00015287596994703782,
      "loss": 0.3424,
      "step": 1918
    },
    {
      "epoch": 0.7086410635155096,
      "grad_norm": 0.27013230323791504,
      "learning_rate": 0.00015285133637147434,
      "loss": 0.3418,
      "step": 1919
    },
    {
      "epoch": 0.7090103397341211,
      "grad_norm": 0.2913917005062103,
      "learning_rate": 0.00015282670279591083,
      "loss": 0.3648,
      "step": 1920
    },
    {
      "epoch": 0.7093796159527327,
      "grad_norm": 0.28088995814323425,
      "learning_rate": 0.00015280206922034734,
      "loss": 0.2865,
      "step": 1921
    },
    {
      "epoch": 0.7097488921713442,
      "grad_norm": 0.3325832486152649,
      "learning_rate": 0.00015277743564478383,
      "loss": 0.403,
      "step": 1922
    },
    {
      "epoch": 0.7101181683899557,
      "grad_norm": 0.2513841986656189,
      "learning_rate": 0.00015275280206922037,
      "loss": 0.3073,
      "step": 1923
    },
    {
      "epoch": 0.7104874446085672,
      "grad_norm": 0.2998408079147339,
      "learning_rate": 0.00015272816849365686,
      "loss": 0.3883,
      "step": 1924
    },
    {
      "epoch": 0.7108567208271788,
      "grad_norm": 0.2626917362213135,
      "learning_rate": 0.00015270353491809337,
      "loss": 0.3203,
      "step": 1925
    },
    {
      "epoch": 0.7112259970457903,
      "grad_norm": 0.222730815410614,
      "learning_rate": 0.00015267890134252986,
      "loss": 0.239,
      "step": 1926
    },
    {
      "epoch": 0.7115952732644018,
      "grad_norm": 0.31508567929267883,
      "learning_rate": 0.00015265426776696637,
      "loss": 0.3203,
      "step": 1927
    },
    {
      "epoch": 0.7119645494830132,
      "grad_norm": 0.23665258288383484,
      "learning_rate": 0.0001526296341914029,
      "loss": 0.3118,
      "step": 1928
    },
    {
      "epoch": 0.7123338257016248,
      "grad_norm": 0.2527763545513153,
      "learning_rate": 0.0001526050006158394,
      "loss": 0.2807,
      "step": 1929
    },
    {
      "epoch": 0.7127031019202363,
      "grad_norm": 0.234427347779274,
      "learning_rate": 0.0001525803670402759,
      "loss": 0.2415,
      "step": 1930
    },
    {
      "epoch": 0.7130723781388478,
      "grad_norm": 0.23413021862506866,
      "learning_rate": 0.0001525557334647124,
      "loss": 0.2821,
      "step": 1931
    },
    {
      "epoch": 0.7134416543574594,
      "grad_norm": 0.2665095329284668,
      "learning_rate": 0.00015253109988914892,
      "loss": 0.3385,
      "step": 1932
    },
    {
      "epoch": 0.7138109305760709,
      "grad_norm": 0.2771255373954773,
      "learning_rate": 0.00015250646631358544,
      "loss": 0.3215,
      "step": 1933
    },
    {
      "epoch": 0.7141802067946824,
      "grad_norm": 0.26301082968711853,
      "learning_rate": 0.00015248183273802192,
      "loss": 0.2928,
      "step": 1934
    },
    {
      "epoch": 0.7145494830132939,
      "grad_norm": 0.3336317539215088,
      "learning_rate": 0.00015245719916245844,
      "loss": 0.3703,
      "step": 1935
    },
    {
      "epoch": 0.7149187592319055,
      "grad_norm": 0.3198574483394623,
      "learning_rate": 0.00015243256558689493,
      "loss": 0.2901,
      "step": 1936
    },
    {
      "epoch": 0.715288035450517,
      "grad_norm": 0.2950039803981781,
      "learning_rate": 0.00015240793201133147,
      "loss": 0.3221,
      "step": 1937
    },
    {
      "epoch": 0.7156573116691285,
      "grad_norm": 0.25150543451309204,
      "learning_rate": 0.00015238329843576795,
      "loss": 0.3088,
      "step": 1938
    },
    {
      "epoch": 0.71602658788774,
      "grad_norm": 0.25831344723701477,
      "learning_rate": 0.00015235866486020447,
      "loss": 0.3233,
      "step": 1939
    },
    {
      "epoch": 0.7163958641063516,
      "grad_norm": 0.2541324496269226,
      "learning_rate": 0.00015233403128464096,
      "loss": 0.2441,
      "step": 1940
    },
    {
      "epoch": 0.7167651403249631,
      "grad_norm": 0.4397067725658417,
      "learning_rate": 0.00015230939770907747,
      "loss": 0.315,
      "step": 1941
    },
    {
      "epoch": 0.7171344165435746,
      "grad_norm": 0.3101305067539215,
      "learning_rate": 0.00015228476413351399,
      "loss": 0.3353,
      "step": 1942
    },
    {
      "epoch": 0.7175036927621861,
      "grad_norm": 0.23744426667690277,
      "learning_rate": 0.0001522601305579505,
      "loss": 0.2855,
      "step": 1943
    },
    {
      "epoch": 0.7178729689807977,
      "grad_norm": 0.267407089471817,
      "learning_rate": 0.000152235496982387,
      "loss": 0.2775,
      "step": 1944
    },
    {
      "epoch": 0.7182422451994092,
      "grad_norm": 0.2828403115272522,
      "learning_rate": 0.0001522108634068235,
      "loss": 0.2896,
      "step": 1945
    },
    {
      "epoch": 0.7186115214180206,
      "grad_norm": 0.25437045097351074,
      "learning_rate": 0.00015218622983126002,
      "loss": 0.2654,
      "step": 1946
    },
    {
      "epoch": 0.7189807976366323,
      "grad_norm": 0.22928428649902344,
      "learning_rate": 0.00015216159625569653,
      "loss": 0.2482,
      "step": 1947
    },
    {
      "epoch": 0.7193500738552437,
      "grad_norm": 0.24921920895576477,
      "learning_rate": 0.00015213696268013302,
      "loss": 0.2982,
      "step": 1948
    },
    {
      "epoch": 0.7197193500738552,
      "grad_norm": 0.27868038415908813,
      "learning_rate": 0.00015211232910456953,
      "loss": 0.258,
      "step": 1949
    },
    {
      "epoch": 0.7200886262924667,
      "grad_norm": 0.372651606798172,
      "learning_rate": 0.00015208769552900605,
      "loss": 0.3602,
      "step": 1950
    },
    {
      "epoch": 0.7200886262924667,
      "eval_loss": 0.29376015067100525,
      "eval_runtime": 5.8538,
      "eval_samples_per_second": 8.541,
      "eval_steps_per_second": 1.196,
      "step": 1950
    },
    {
      "epoch": 0.7204579025110783,
      "grad_norm": 0.27268344163894653,
      "learning_rate": 0.00015206306195344256,
      "loss": 0.3007,
      "step": 1951
    },
    {
      "epoch": 0.7208271787296898,
      "grad_norm": 0.33668243885040283,
      "learning_rate": 0.00015203842837787905,
      "loss": 0.3828,
      "step": 1952
    },
    {
      "epoch": 0.7211964549483013,
      "grad_norm": 0.2558956444263458,
      "learning_rate": 0.00015201379480231557,
      "loss": 0.2787,
      "step": 1953
    },
    {
      "epoch": 0.7215657311669128,
      "grad_norm": 0.25845855474472046,
      "learning_rate": 0.00015198916122675205,
      "loss": 0.3092,
      "step": 1954
    },
    {
      "epoch": 0.7219350073855244,
      "grad_norm": 0.2578001320362091,
      "learning_rate": 0.0001519645276511886,
      "loss": 0.302,
      "step": 1955
    },
    {
      "epoch": 0.7223042836041359,
      "grad_norm": 0.30225417017936707,
      "learning_rate": 0.00015193989407562508,
      "loss": 0.3521,
      "step": 1956
    },
    {
      "epoch": 0.7226735598227474,
      "grad_norm": 0.23431427776813507,
      "learning_rate": 0.0001519152605000616,
      "loss": 0.2989,
      "step": 1957
    },
    {
      "epoch": 0.7230428360413589,
      "grad_norm": 0.269161581993103,
      "learning_rate": 0.00015189062692449808,
      "loss": 0.2951,
      "step": 1958
    },
    {
      "epoch": 0.7234121122599705,
      "grad_norm": 0.26195868849754333,
      "learning_rate": 0.0001518659933489346,
      "loss": 0.2539,
      "step": 1959
    },
    {
      "epoch": 0.723781388478582,
      "grad_norm": 0.2628759443759918,
      "learning_rate": 0.00015184135977337111,
      "loss": 0.3122,
      "step": 1960
    },
    {
      "epoch": 0.7241506646971935,
      "grad_norm": 0.22648142278194427,
      "learning_rate": 0.00015181672619780763,
      "loss": 0.2497,
      "step": 1961
    },
    {
      "epoch": 0.7245199409158051,
      "grad_norm": 0.29775509238243103,
      "learning_rate": 0.00015179209262224412,
      "loss": 0.3033,
      "step": 1962
    },
    {
      "epoch": 0.7248892171344166,
      "grad_norm": 0.24567222595214844,
      "learning_rate": 0.00015176745904668063,
      "loss": 0.3178,
      "step": 1963
    },
    {
      "epoch": 0.725258493353028,
      "grad_norm": 0.2651590406894684,
      "learning_rate": 0.00015174282547111715,
      "loss": 0.3034,
      "step": 1964
    },
    {
      "epoch": 0.7256277695716395,
      "grad_norm": 0.24350565671920776,
      "learning_rate": 0.00015171819189555366,
      "loss": 0.2778,
      "step": 1965
    },
    {
      "epoch": 0.7259970457902511,
      "grad_norm": 0.2624291777610779,
      "learning_rate": 0.00015169355831999015,
      "loss": 0.2585,
      "step": 1966
    },
    {
      "epoch": 0.7263663220088626,
      "grad_norm": 0.28388434648513794,
      "learning_rate": 0.00015166892474442666,
      "loss": 0.2926,
      "step": 1967
    },
    {
      "epoch": 0.7267355982274741,
      "grad_norm": 0.29764947295188904,
      "learning_rate": 0.00015164429116886315,
      "loss": 0.3645,
      "step": 1968
    },
    {
      "epoch": 0.7271048744460856,
      "grad_norm": 0.25831013917922974,
      "learning_rate": 0.0001516196575932997,
      "loss": 0.3071,
      "step": 1969
    },
    {
      "epoch": 0.7274741506646972,
      "grad_norm": 0.3143414258956909,
      "learning_rate": 0.00015159502401773618,
      "loss": 0.3491,
      "step": 1970
    },
    {
      "epoch": 0.7278434268833087,
      "grad_norm": 0.2558245062828064,
      "learning_rate": 0.0001515703904421727,
      "loss": 0.2666,
      "step": 1971
    },
    {
      "epoch": 0.7282127031019202,
      "grad_norm": 0.2600441873073578,
      "learning_rate": 0.00015154575686660918,
      "loss": 0.289,
      "step": 1972
    },
    {
      "epoch": 0.7285819793205317,
      "grad_norm": 0.32279205322265625,
      "learning_rate": 0.0001515211232910457,
      "loss": 0.3442,
      "step": 1973
    },
    {
      "epoch": 0.7289512555391433,
      "grad_norm": 0.2380223423242569,
      "learning_rate": 0.0001514964897154822,
      "loss": 0.2647,
      "step": 1974
    },
    {
      "epoch": 0.7293205317577548,
      "grad_norm": 0.2663707435131073,
      "learning_rate": 0.00015147185613991873,
      "loss": 0.3083,
      "step": 1975
    },
    {
      "epoch": 0.7296898079763663,
      "grad_norm": 0.248251274228096,
      "learning_rate": 0.0001514472225643552,
      "loss": 0.2856,
      "step": 1976
    },
    {
      "epoch": 0.7300590841949779,
      "grad_norm": 0.28036782145500183,
      "learning_rate": 0.00015142258898879173,
      "loss": 0.3426,
      "step": 1977
    },
    {
      "epoch": 0.7304283604135894,
      "grad_norm": 0.30917128920555115,
      "learning_rate": 0.00015139795541322824,
      "loss": 0.3438,
      "step": 1978
    },
    {
      "epoch": 0.7307976366322009,
      "grad_norm": 0.2681553363800049,
      "learning_rate": 0.00015137332183766476,
      "loss": 0.3099,
      "step": 1979
    },
    {
      "epoch": 0.7311669128508124,
      "grad_norm": 0.2959456443786621,
      "learning_rate": 0.00015134868826210124,
      "loss": 0.2986,
      "step": 1980
    },
    {
      "epoch": 0.731536189069424,
      "grad_norm": 0.2918236255645752,
      "learning_rate": 0.00015132405468653776,
      "loss": 0.3043,
      "step": 1981
    },
    {
      "epoch": 0.7319054652880355,
      "grad_norm": 0.2856104075908661,
      "learning_rate": 0.00015129942111097427,
      "loss": 0.3195,
      "step": 1982
    },
    {
      "epoch": 0.732274741506647,
      "grad_norm": 0.33086833357810974,
      "learning_rate": 0.0001512747875354108,
      "loss": 0.4175,
      "step": 1983
    },
    {
      "epoch": 0.7326440177252584,
      "grad_norm": 0.2300816923379898,
      "learning_rate": 0.00015125015395984728,
      "loss": 0.2974,
      "step": 1984
    },
    {
      "epoch": 0.73301329394387,
      "grad_norm": 0.3335312604904175,
      "learning_rate": 0.0001512255203842838,
      "loss": 0.2704,
      "step": 1985
    },
    {
      "epoch": 0.7333825701624815,
      "grad_norm": 0.26959389448165894,
      "learning_rate": 0.00015120088680872028,
      "loss": 0.3085,
      "step": 1986
    },
    {
      "epoch": 0.733751846381093,
      "grad_norm": 0.29761821031570435,
      "learning_rate": 0.00015117625323315682,
      "loss": 0.3592,
      "step": 1987
    },
    {
      "epoch": 0.7341211225997046,
      "grad_norm": 0.2845323085784912,
      "learning_rate": 0.0001511516196575933,
      "loss": 0.3228,
      "step": 1988
    },
    {
      "epoch": 0.7344903988183161,
      "grad_norm": 0.29670780897140503,
      "learning_rate": 0.00015112698608202982,
      "loss": 0.3098,
      "step": 1989
    },
    {
      "epoch": 0.7348596750369276,
      "grad_norm": 0.2633662819862366,
      "learning_rate": 0.0001511023525064663,
      "loss": 0.2927,
      "step": 1990
    },
    {
      "epoch": 0.7352289512555391,
      "grad_norm": 0.2808889150619507,
      "learning_rate": 0.00015107771893090282,
      "loss": 0.2659,
      "step": 1991
    },
    {
      "epoch": 0.7355982274741507,
      "grad_norm": 0.2930167019367218,
      "learning_rate": 0.00015105308535533934,
      "loss": 0.3096,
      "step": 1992
    },
    {
      "epoch": 0.7359675036927622,
      "grad_norm": 0.2737327218055725,
      "learning_rate": 0.00015102845177977585,
      "loss": 0.3401,
      "step": 1993
    },
    {
      "epoch": 0.7363367799113737,
      "grad_norm": 0.30987557768821716,
      "learning_rate": 0.00015100381820421234,
      "loss": 0.3673,
      "step": 1994
    },
    {
      "epoch": 0.7367060561299852,
      "grad_norm": 0.2720504105091095,
      "learning_rate": 0.00015097918462864886,
      "loss": 0.2611,
      "step": 1995
    },
    {
      "epoch": 0.7370753323485968,
      "grad_norm": 0.26491352915763855,
      "learning_rate": 0.00015095455105308537,
      "loss": 0.2601,
      "step": 1996
    },
    {
      "epoch": 0.7374446085672083,
      "grad_norm": 0.2890115976333618,
      "learning_rate": 0.00015092991747752188,
      "loss": 0.2759,
      "step": 1997
    },
    {
      "epoch": 0.7378138847858198,
      "grad_norm": 0.26483097672462463,
      "learning_rate": 0.00015090528390195837,
      "loss": 0.2952,
      "step": 1998
    },
    {
      "epoch": 0.7381831610044313,
      "grad_norm": 0.4067525565624237,
      "learning_rate": 0.0001508806503263949,
      "loss": 0.3835,
      "step": 1999
    },
    {
      "epoch": 0.7385524372230429,
      "grad_norm": 0.25175902247428894,
      "learning_rate": 0.00015085601675083137,
      "loss": 0.3223,
      "step": 2000
    },
    {
      "epoch": 0.7385524372230429,
      "eval_loss": 0.29646578431129456,
      "eval_runtime": 5.8534,
      "eval_samples_per_second": 8.542,
      "eval_steps_per_second": 1.196,
      "step": 2000
    },
    {
      "epoch": 0.7389217134416544,
      "grad_norm": 0.23213210701942444,
      "learning_rate": 0.00015083138317526792,
      "loss": 0.2536,
      "step": 2001
    },
    {
      "epoch": 0.7392909896602659,
      "grad_norm": 0.3151390552520752,
      "learning_rate": 0.0001508067495997044,
      "loss": 0.3286,
      "step": 2002
    },
    {
      "epoch": 0.7396602658788775,
      "grad_norm": 0.30138006806373596,
      "learning_rate": 0.00015078211602414092,
      "loss": 0.345,
      "step": 2003
    },
    {
      "epoch": 0.740029542097489,
      "grad_norm": 0.3060076832771301,
      "learning_rate": 0.0001507574824485774,
      "loss": 0.3118,
      "step": 2004
    },
    {
      "epoch": 0.7403988183161004,
      "grad_norm": 0.26108142733573914,
      "learning_rate": 0.00015073284887301392,
      "loss": 0.281,
      "step": 2005
    },
    {
      "epoch": 0.7407680945347119,
      "grad_norm": 0.23906740546226501,
      "learning_rate": 0.00015070821529745043,
      "loss": 0.3418,
      "step": 2006
    },
    {
      "epoch": 0.7411373707533235,
      "grad_norm": 0.26794371008872986,
      "learning_rate": 0.00015068358172188695,
      "loss": 0.2954,
      "step": 2007
    },
    {
      "epoch": 0.741506646971935,
      "grad_norm": 0.2571132481098175,
      "learning_rate": 0.00015065894814632344,
      "loss": 0.2705,
      "step": 2008
    },
    {
      "epoch": 0.7418759231905465,
      "grad_norm": 0.38293299078941345,
      "learning_rate": 0.00015063431457075995,
      "loss": 0.3252,
      "step": 2009
    },
    {
      "epoch": 0.742245199409158,
      "grad_norm": 0.22673162817955017,
      "learning_rate": 0.00015060968099519647,
      "loss": 0.2243,
      "step": 2010
    },
    {
      "epoch": 0.7426144756277696,
      "grad_norm": 0.286089152097702,
      "learning_rate": 0.00015058504741963298,
      "loss": 0.2674,
      "step": 2011
    },
    {
      "epoch": 0.7429837518463811,
      "grad_norm": 0.25819486379623413,
      "learning_rate": 0.00015056041384406947,
      "loss": 0.3055,
      "step": 2012
    },
    {
      "epoch": 0.7433530280649926,
      "grad_norm": 0.24408473074436188,
      "learning_rate": 0.00015053578026850598,
      "loss": 0.2664,
      "step": 2013
    },
    {
      "epoch": 0.7437223042836041,
      "grad_norm": 0.2625264823436737,
      "learning_rate": 0.00015051114669294247,
      "loss": 0.3595,
      "step": 2014
    },
    {
      "epoch": 0.7440915805022157,
      "grad_norm": 0.2610447108745575,
      "learning_rate": 0.000150486513117379,
      "loss": 0.3264,
      "step": 2015
    },
    {
      "epoch": 0.7444608567208272,
      "grad_norm": 0.2926216721534729,
      "learning_rate": 0.0001504618795418155,
      "loss": 0.3684,
      "step": 2016
    },
    {
      "epoch": 0.7448301329394387,
      "grad_norm": 0.2876848876476288,
      "learning_rate": 0.00015043724596625201,
      "loss": 0.2529,
      "step": 2017
    },
    {
      "epoch": 0.7451994091580503,
      "grad_norm": 0.31420740485191345,
      "learning_rate": 0.0001504126123906885,
      "loss": 0.3609,
      "step": 2018
    },
    {
      "epoch": 0.7455686853766618,
      "grad_norm": 0.25228697061538696,
      "learning_rate": 0.00015038797881512502,
      "loss": 0.2823,
      "step": 2019
    },
    {
      "epoch": 0.7459379615952733,
      "grad_norm": 0.3094099462032318,
      "learning_rate": 0.00015036334523956153,
      "loss": 0.3351,
      "step": 2020
    },
    {
      "epoch": 0.7463072378138847,
      "grad_norm": 0.287903755903244,
      "learning_rate": 0.00015033871166399805,
      "loss": 0.3111,
      "step": 2021
    },
    {
      "epoch": 0.7466765140324964,
      "grad_norm": 0.3031036853790283,
      "learning_rate": 0.00015031407808843453,
      "loss": 0.3174,
      "step": 2022
    },
    {
      "epoch": 0.7470457902511078,
      "grad_norm": 0.21436475217342377,
      "learning_rate": 0.00015028944451287105,
      "loss": 0.2926,
      "step": 2023
    },
    {
      "epoch": 0.7474150664697193,
      "grad_norm": 0.3065086305141449,
      "learning_rate": 0.00015026481093730756,
      "loss": 0.3138,
      "step": 2024
    },
    {
      "epoch": 0.7477843426883308,
      "grad_norm": 0.25306811928749084,
      "learning_rate": 0.00015024017736174408,
      "loss": 0.2447,
      "step": 2025
    },
    {
      "epoch": 0.7481536189069424,
      "grad_norm": 0.2480946034193039,
      "learning_rate": 0.00015021554378618057,
      "loss": 0.2867,
      "step": 2026
    },
    {
      "epoch": 0.7485228951255539,
      "grad_norm": 0.3200647830963135,
      "learning_rate": 0.00015019091021061708,
      "loss": 0.3645,
      "step": 2027
    },
    {
      "epoch": 0.7488921713441654,
      "grad_norm": 0.25502264499664307,
      "learning_rate": 0.0001501662766350536,
      "loss": 0.2771,
      "step": 2028
    },
    {
      "epoch": 0.7492614475627769,
      "grad_norm": 0.2702923119068146,
      "learning_rate": 0.0001501416430594901,
      "loss": 0.287,
      "step": 2029
    },
    {
      "epoch": 0.7496307237813885,
      "grad_norm": 0.30667737126350403,
      "learning_rate": 0.0001501170094839266,
      "loss": 0.3356,
      "step": 2030
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.26852285861968994,
      "learning_rate": 0.0001500923759083631,
      "loss": 0.3264,
      "step": 2031
    },
    {
      "epoch": 0.7503692762186115,
      "grad_norm": 0.2851296663284302,
      "learning_rate": 0.0001500677423327996,
      "loss": 0.359,
      "step": 2032
    },
    {
      "epoch": 0.7507385524372231,
      "grad_norm": 0.2569027841091156,
      "learning_rate": 0.00015004310875723614,
      "loss": 0.2933,
      "step": 2033
    },
    {
      "epoch": 0.7511078286558346,
      "grad_norm": 0.2416686713695526,
      "learning_rate": 0.00015001847518167263,
      "loss": 0.2826,
      "step": 2034
    },
    {
      "epoch": 0.7514771048744461,
      "grad_norm": 0.255740225315094,
      "learning_rate": 0.00014999384160610914,
      "loss": 0.3414,
      "step": 2035
    },
    {
      "epoch": 0.7518463810930576,
      "grad_norm": 0.28471001982688904,
      "learning_rate": 0.00014996920803054563,
      "loss": 0.3215,
      "step": 2036
    },
    {
      "epoch": 0.7522156573116692,
      "grad_norm": 0.21029578149318695,
      "learning_rate": 0.00014994457445498214,
      "loss": 0.1987,
      "step": 2037
    },
    {
      "epoch": 0.7525849335302807,
      "grad_norm": 0.23480457067489624,
      "learning_rate": 0.00014991994087941866,
      "loss": 0.2928,
      "step": 2038
    },
    {
      "epoch": 0.7529542097488922,
      "grad_norm": 0.3157004117965698,
      "learning_rate": 0.00014989530730385517,
      "loss": 0.3403,
      "step": 2039
    },
    {
      "epoch": 0.7533234859675036,
      "grad_norm": 0.23648284375667572,
      "learning_rate": 0.00014987067372829166,
      "loss": 0.288,
      "step": 2040
    },
    {
      "epoch": 0.7536927621861153,
      "grad_norm": 0.3547457754611969,
      "learning_rate": 0.00014984604015272818,
      "loss": 0.4128,
      "step": 2041
    },
    {
      "epoch": 0.7540620384047267,
      "grad_norm": 0.32118773460388184,
      "learning_rate": 0.0001498214065771647,
      "loss": 0.3829,
      "step": 2042
    },
    {
      "epoch": 0.7544313146233382,
      "grad_norm": 0.2910565137863159,
      "learning_rate": 0.0001497967730016012,
      "loss": 0.3197,
      "step": 2043
    },
    {
      "epoch": 0.7548005908419497,
      "grad_norm": 0.24634191393852234,
      "learning_rate": 0.0001497721394260377,
      "loss": 0.2618,
      "step": 2044
    },
    {
      "epoch": 0.7551698670605613,
      "grad_norm": 0.3879796862602234,
      "learning_rate": 0.0001497475058504742,
      "loss": 0.3678,
      "step": 2045
    },
    {
      "epoch": 0.7555391432791728,
      "grad_norm": 0.34548044204711914,
      "learning_rate": 0.0001497228722749107,
      "loss": 0.4285,
      "step": 2046
    },
    {
      "epoch": 0.7559084194977843,
      "grad_norm": 0.298462450504303,
      "learning_rate": 0.00014969823869934724,
      "loss": 0.36,
      "step": 2047
    },
    {
      "epoch": 0.7562776957163959,
      "grad_norm": 0.2912174165248871,
      "learning_rate": 0.00014967360512378372,
      "loss": 0.3545,
      "step": 2048
    },
    {
      "epoch": 0.7566469719350074,
      "grad_norm": 0.3759162724018097,
      "learning_rate": 0.00014964897154822024,
      "loss": 0.3304,
      "step": 2049
    },
    {
      "epoch": 0.7570162481536189,
      "grad_norm": 0.28800928592681885,
      "learning_rate": 0.00014962433797265673,
      "loss": 0.345,
      "step": 2050
    },
    {
      "epoch": 0.7570162481536189,
      "eval_loss": 0.2972644567489624,
      "eval_runtime": 5.8587,
      "eval_samples_per_second": 8.534,
      "eval_steps_per_second": 1.195,
      "step": 2050
    },
    {
      "epoch": 0.7573855243722304,
      "grad_norm": 0.23943784832954407,
      "learning_rate": 0.00014959970439709324,
      "loss": 0.2881,
      "step": 2051
    },
    {
      "epoch": 0.757754800590842,
      "grad_norm": 0.30913013219833374,
      "learning_rate": 0.00014957507082152976,
      "loss": 0.3886,
      "step": 2052
    },
    {
      "epoch": 0.7581240768094535,
      "grad_norm": 0.3367602527141571,
      "learning_rate": 0.00014955043724596627,
      "loss": 0.4031,
      "step": 2053
    },
    {
      "epoch": 0.758493353028065,
      "grad_norm": 0.2651554048061371,
      "learning_rate": 0.00014952580367040276,
      "loss": 0.263,
      "step": 2054
    },
    {
      "epoch": 0.7588626292466765,
      "grad_norm": 0.3087141215801239,
      "learning_rate": 0.00014950117009483927,
      "loss": 0.3262,
      "step": 2055
    },
    {
      "epoch": 0.7592319054652881,
      "grad_norm": 0.30410876870155334,
      "learning_rate": 0.0001494765365192758,
      "loss": 0.324,
      "step": 2056
    },
    {
      "epoch": 0.7596011816838996,
      "grad_norm": 0.31383657455444336,
      "learning_rate": 0.0001494519029437123,
      "loss": 0.3655,
      "step": 2057
    },
    {
      "epoch": 0.759970457902511,
      "grad_norm": 0.23047925531864166,
      "learning_rate": 0.0001494272693681488,
      "loss": 0.2653,
      "step": 2058
    },
    {
      "epoch": 0.7603397341211225,
      "grad_norm": 0.29686346650123596,
      "learning_rate": 0.0001494026357925853,
      "loss": 0.3427,
      "step": 2059
    },
    {
      "epoch": 0.7607090103397341,
      "grad_norm": 0.23575901985168457,
      "learning_rate": 0.00014937800221702182,
      "loss": 0.2934,
      "step": 2060
    },
    {
      "epoch": 0.7610782865583456,
      "grad_norm": 0.29045569896698,
      "learning_rate": 0.00014935336864145833,
      "loss": 0.3784,
      "step": 2061
    },
    {
      "epoch": 0.7614475627769571,
      "grad_norm": 0.22907930612564087,
      "learning_rate": 0.00014932873506589482,
      "loss": 0.2935,
      "step": 2062
    },
    {
      "epoch": 0.7618168389955687,
      "grad_norm": 0.28066712617874146,
      "learning_rate": 0.00014930410149033134,
      "loss": 0.3231,
      "step": 2063
    },
    {
      "epoch": 0.7621861152141802,
      "grad_norm": 0.30530381202697754,
      "learning_rate": 0.00014927946791476782,
      "loss": 0.2737,
      "step": 2064
    },
    {
      "epoch": 0.7625553914327917,
      "grad_norm": 0.286344051361084,
      "learning_rate": 0.00014925483433920436,
      "loss": 0.3369,
      "step": 2065
    },
    {
      "epoch": 0.7629246676514032,
      "grad_norm": 0.20019960403442383,
      "learning_rate": 0.00014923020076364085,
      "loss": 0.2157,
      "step": 2066
    },
    {
      "epoch": 0.7632939438700148,
      "grad_norm": 0.30169007182121277,
      "learning_rate": 0.00014920556718807737,
      "loss": 0.3248,
      "step": 2067
    },
    {
      "epoch": 0.7636632200886263,
      "grad_norm": 0.24721257388591766,
      "learning_rate": 0.00014918093361251385,
      "loss": 0.301,
      "step": 2068
    },
    {
      "epoch": 0.7640324963072378,
      "grad_norm": 0.25720271468162537,
      "learning_rate": 0.00014915630003695037,
      "loss": 0.2868,
      "step": 2069
    },
    {
      "epoch": 0.7644017725258493,
      "grad_norm": 0.26444628834724426,
      "learning_rate": 0.00014913166646138688,
      "loss": 0.3082,
      "step": 2070
    },
    {
      "epoch": 0.7647710487444609,
      "grad_norm": 0.2681595981121063,
      "learning_rate": 0.0001491070328858234,
      "loss": 0.2994,
      "step": 2071
    },
    {
      "epoch": 0.7651403249630724,
      "grad_norm": 0.27749788761138916,
      "learning_rate": 0.00014908239931025989,
      "loss": 0.3161,
      "step": 2072
    },
    {
      "epoch": 0.7655096011816839,
      "grad_norm": 0.28634199500083923,
      "learning_rate": 0.0001490577657346964,
      "loss": 0.3453,
      "step": 2073
    },
    {
      "epoch": 0.7658788774002954,
      "grad_norm": 0.24624323844909668,
      "learning_rate": 0.00014903313215913292,
      "loss": 0.2868,
      "step": 2074
    },
    {
      "epoch": 0.766248153618907,
      "grad_norm": 0.27104800939559937,
      "learning_rate": 0.00014900849858356943,
      "loss": 0.2837,
      "step": 2075
    },
    {
      "epoch": 0.7666174298375185,
      "grad_norm": 0.2961234152317047,
      "learning_rate": 0.00014898386500800592,
      "loss": 0.308,
      "step": 2076
    },
    {
      "epoch": 0.76698670605613,
      "grad_norm": 0.2492390125989914,
      "learning_rate": 0.00014895923143244243,
      "loss": 0.2736,
      "step": 2077
    },
    {
      "epoch": 0.7673559822747416,
      "grad_norm": 0.3363376557826996,
      "learning_rate": 0.00014893459785687892,
      "loss": 0.3132,
      "step": 2078
    },
    {
      "epoch": 0.767725258493353,
      "grad_norm": 0.41437986493110657,
      "learning_rate": 0.00014890996428131546,
      "loss": 0.3341,
      "step": 2079
    },
    {
      "epoch": 0.7680945347119645,
      "grad_norm": 0.3183686435222626,
      "learning_rate": 0.00014888533070575195,
      "loss": 0.376,
      "step": 2080
    },
    {
      "epoch": 0.768463810930576,
      "grad_norm": 0.2770771384239197,
      "learning_rate": 0.00014886069713018846,
      "loss": 0.3033,
      "step": 2081
    },
    {
      "epoch": 0.7688330871491876,
      "grad_norm": 0.3003086447715759,
      "learning_rate": 0.00014883606355462495,
      "loss": 0.3725,
      "step": 2082
    },
    {
      "epoch": 0.7692023633677991,
      "grad_norm": 0.2913537323474884,
      "learning_rate": 0.00014881142997906147,
      "loss": 0.3242,
      "step": 2083
    },
    {
      "epoch": 0.7695716395864106,
      "grad_norm": 0.31694701313972473,
      "learning_rate": 0.00014878679640349798,
      "loss": 0.3658,
      "step": 2084
    },
    {
      "epoch": 0.7699409158050221,
      "grad_norm": 0.2636033594608307,
      "learning_rate": 0.0001487621628279345,
      "loss": 0.2862,
      "step": 2085
    },
    {
      "epoch": 0.7703101920236337,
      "grad_norm": 0.3004938066005707,
      "learning_rate": 0.00014873752925237098,
      "loss": 0.2993,
      "step": 2086
    },
    {
      "epoch": 0.7706794682422452,
      "grad_norm": 0.31371644139289856,
      "learning_rate": 0.0001487128956768075,
      "loss": 0.3697,
      "step": 2087
    },
    {
      "epoch": 0.7710487444608567,
      "grad_norm": 0.30200207233428955,
      "learning_rate": 0.000148688262101244,
      "loss": 0.3745,
      "step": 2088
    },
    {
      "epoch": 0.7714180206794683,
      "grad_norm": 0.29897138476371765,
      "learning_rate": 0.00014866362852568053,
      "loss": 0.3078,
      "step": 2089
    },
    {
      "epoch": 0.7717872968980798,
      "grad_norm": 0.2919757664203644,
      "learning_rate": 0.00014863899495011701,
      "loss": 0.3105,
      "step": 2090
    },
    {
      "epoch": 0.7721565731166913,
      "grad_norm": 0.31833651661872864,
      "learning_rate": 0.00014861436137455353,
      "loss": 0.451,
      "step": 2091
    },
    {
      "epoch": 0.7725258493353028,
      "grad_norm": 0.2824268043041229,
      "learning_rate": 0.00014858972779899004,
      "loss": 0.3029,
      "step": 2092
    },
    {
      "epoch": 0.7728951255539144,
      "grad_norm": 0.37862062454223633,
      "learning_rate": 0.00014856509422342653,
      "loss": 0.3462,
      "step": 2093
    },
    {
      "epoch": 0.7732644017725259,
      "grad_norm": 0.2715948522090912,
      "learning_rate": 0.00014854046064786305,
      "loss": 0.2842,
      "step": 2094
    },
    {
      "epoch": 0.7736336779911374,
      "grad_norm": 0.2682742476463318,
      "learning_rate": 0.00014851582707229953,
      "loss": 0.3047,
      "step": 2095
    },
    {
      "epoch": 0.7740029542097489,
      "grad_norm": 0.28688040375709534,
      "learning_rate": 0.00014849119349673605,
      "loss": 0.3445,
      "step": 2096
    },
    {
      "epoch": 0.7743722304283605,
      "grad_norm": 0.29540103673934937,
      "learning_rate": 0.00014846655992117256,
      "loss": 0.3515,
      "step": 2097
    },
    {
      "epoch": 0.774741506646972,
      "grad_norm": 0.2762974798679352,
      "learning_rate": 0.00014844192634560908,
      "loss": 0.3373,
      "step": 2098
    },
    {
      "epoch": 0.7751107828655834,
      "grad_norm": 0.2746526896953583,
      "learning_rate": 0.00014841729277004556,
      "loss": 0.2879,
      "step": 2099
    },
    {
      "epoch": 0.7754800590841949,
      "grad_norm": 0.2964654862880707,
      "learning_rate": 0.00014839265919448208,
      "loss": 0.3493,
      "step": 2100
    },
    {
      "epoch": 0.7754800590841949,
      "eval_loss": 0.2935633361339569,
      "eval_runtime": 5.8653,
      "eval_samples_per_second": 8.525,
      "eval_steps_per_second": 1.193,
      "step": 2100
    },
    {
      "epoch": 0.7758493353028065,
      "grad_norm": 0.34667137265205383,
      "learning_rate": 0.0001483680256189186,
      "loss": 0.2905,
      "step": 2101
    },
    {
      "epoch": 0.776218611521418,
      "grad_norm": 0.30348148941993713,
      "learning_rate": 0.0001483433920433551,
      "loss": 0.3285,
      "step": 2102
    },
    {
      "epoch": 0.7765878877400295,
      "grad_norm": 0.2987040877342224,
      "learning_rate": 0.0001483187584677916,
      "loss": 0.25,
      "step": 2103
    },
    {
      "epoch": 0.7769571639586411,
      "grad_norm": 0.2796286642551422,
      "learning_rate": 0.0001482941248922281,
      "loss": 0.3412,
      "step": 2104
    },
    {
      "epoch": 0.7773264401772526,
      "grad_norm": 0.3094947040081024,
      "learning_rate": 0.0001482694913166646,
      "loss": 0.2876,
      "step": 2105
    },
    {
      "epoch": 0.7776957163958641,
      "grad_norm": 0.3286437690258026,
      "learning_rate": 0.00014824485774110114,
      "loss": 0.2762,
      "step": 2106
    },
    {
      "epoch": 0.7780649926144756,
      "grad_norm": 0.22929511964321136,
      "learning_rate": 0.00014822022416553763,
      "loss": 0.2928,
      "step": 2107
    },
    {
      "epoch": 0.7784342688330872,
      "grad_norm": 0.2838447093963623,
      "learning_rate": 0.00014819559058997414,
      "loss": 0.3409,
      "step": 2108
    },
    {
      "epoch": 0.7788035450516987,
      "grad_norm": 0.2506687343120575,
      "learning_rate": 0.00014817095701441063,
      "loss": 0.2909,
      "step": 2109
    },
    {
      "epoch": 0.7791728212703102,
      "grad_norm": 0.3038922846317291,
      "learning_rate": 0.00014814632343884714,
      "loss": 0.3414,
      "step": 2110
    },
    {
      "epoch": 0.7795420974889217,
      "grad_norm": 0.3076067566871643,
      "learning_rate": 0.00014812168986328366,
      "loss": 0.2651,
      "step": 2111
    },
    {
      "epoch": 0.7799113737075333,
      "grad_norm": 0.2969833016395569,
      "learning_rate": 0.00014809705628772017,
      "loss": 0.278,
      "step": 2112
    },
    {
      "epoch": 0.7802806499261448,
      "grad_norm": 0.44124898314476013,
      "learning_rate": 0.00014807242271215666,
      "loss": 0.3854,
      "step": 2113
    },
    {
      "epoch": 0.7806499261447563,
      "grad_norm": 0.24413622915744781,
      "learning_rate": 0.00014804778913659318,
      "loss": 0.2898,
      "step": 2114
    },
    {
      "epoch": 0.7810192023633677,
      "grad_norm": 0.27109289169311523,
      "learning_rate": 0.0001480231555610297,
      "loss": 0.2891,
      "step": 2115
    },
    {
      "epoch": 0.7813884785819794,
      "grad_norm": 0.31798434257507324,
      "learning_rate": 0.0001479985219854662,
      "loss": 0.3494,
      "step": 2116
    },
    {
      "epoch": 0.7817577548005908,
      "grad_norm": 0.298524409532547,
      "learning_rate": 0.0001479738884099027,
      "loss": 0.3293,
      "step": 2117
    },
    {
      "epoch": 0.7821270310192023,
      "grad_norm": 0.3131650388240814,
      "learning_rate": 0.0001479492548343392,
      "loss": 0.3088,
      "step": 2118
    },
    {
      "epoch": 0.7824963072378139,
      "grad_norm": 0.2757169008255005,
      "learning_rate": 0.00014792462125877572,
      "loss": 0.2962,
      "step": 2119
    },
    {
      "epoch": 0.7828655834564254,
      "grad_norm": 0.3562310039997101,
      "learning_rate": 0.00014789998768321224,
      "loss": 0.2901,
      "step": 2120
    },
    {
      "epoch": 0.7832348596750369,
      "grad_norm": 0.30650612711906433,
      "learning_rate": 0.00014787535410764872,
      "loss": 0.3469,
      "step": 2121
    },
    {
      "epoch": 0.7836041358936484,
      "grad_norm": 0.3404449224472046,
      "learning_rate": 0.00014785072053208524,
      "loss": 0.2914,
      "step": 2122
    },
    {
      "epoch": 0.78397341211226,
      "grad_norm": 0.3603094220161438,
      "learning_rate": 0.00014782608695652173,
      "loss": 0.429,
      "step": 2123
    },
    {
      "epoch": 0.7843426883308715,
      "grad_norm": 0.2617523670196533,
      "learning_rate": 0.00014780145338095827,
      "loss": 0.2303,
      "step": 2124
    },
    {
      "epoch": 0.784711964549483,
      "grad_norm": 0.3045143783092499,
      "learning_rate": 0.00014777681980539476,
      "loss": 0.3371,
      "step": 2125
    },
    {
      "epoch": 0.7850812407680945,
      "grad_norm": 0.25733378529548645,
      "learning_rate": 0.00014775218622983127,
      "loss": 0.2849,
      "step": 2126
    },
    {
      "epoch": 0.7854505169867061,
      "grad_norm": 0.26336920261383057,
      "learning_rate": 0.00014772755265426776,
      "loss": 0.3082,
      "step": 2127
    },
    {
      "epoch": 0.7858197932053176,
      "grad_norm": 0.23966217041015625,
      "learning_rate": 0.00014770291907870427,
      "loss": 0.2412,
      "step": 2128
    },
    {
      "epoch": 0.7861890694239291,
      "grad_norm": 0.34771105647087097,
      "learning_rate": 0.0001476782855031408,
      "loss": 0.3221,
      "step": 2129
    },
    {
      "epoch": 0.7865583456425406,
      "grad_norm": 0.26496586203575134,
      "learning_rate": 0.0001476536519275773,
      "loss": 0.2855,
      "step": 2130
    },
    {
      "epoch": 0.7869276218611522,
      "grad_norm": 0.2909802794456482,
      "learning_rate": 0.0001476290183520138,
      "loss": 0.324,
      "step": 2131
    },
    {
      "epoch": 0.7872968980797637,
      "grad_norm": 0.24908676743507385,
      "learning_rate": 0.0001476043847764503,
      "loss": 0.2918,
      "step": 2132
    },
    {
      "epoch": 0.7876661742983752,
      "grad_norm": 0.3057284653186798,
      "learning_rate": 0.00014757975120088682,
      "loss": 0.3545,
      "step": 2133
    },
    {
      "epoch": 0.7880354505169868,
      "grad_norm": 0.32036465406417847,
      "learning_rate": 0.00014755511762532333,
      "loss": 0.3512,
      "step": 2134
    },
    {
      "epoch": 0.7884047267355982,
      "grad_norm": 0.32289084792137146,
      "learning_rate": 0.00014753048404975982,
      "loss": 0.3814,
      "step": 2135
    },
    {
      "epoch": 0.7887740029542097,
      "grad_norm": 0.27264589071273804,
      "learning_rate": 0.00014750585047419634,
      "loss": 0.2964,
      "step": 2136
    },
    {
      "epoch": 0.7891432791728212,
      "grad_norm": 0.3594219386577606,
      "learning_rate": 0.00014748121689863282,
      "loss": 0.3189,
      "step": 2137
    },
    {
      "epoch": 0.7895125553914328,
      "grad_norm": 0.23406580090522766,
      "learning_rate": 0.00014745658332306936,
      "loss": 0.2497,
      "step": 2138
    },
    {
      "epoch": 0.7898818316100443,
      "grad_norm": 0.24143576622009277,
      "learning_rate": 0.00014743194974750585,
      "loss": 0.2709,
      "step": 2139
    },
    {
      "epoch": 0.7902511078286558,
      "grad_norm": 0.23684187233448029,
      "learning_rate": 0.00014740731617194237,
      "loss": 0.2494,
      "step": 2140
    },
    {
      "epoch": 0.7906203840472673,
      "grad_norm": 0.3377109467983246,
      "learning_rate": 0.00014738268259637885,
      "loss": 0.3863,
      "step": 2141
    },
    {
      "epoch": 0.7909896602658789,
      "grad_norm": 0.4215676188468933,
      "learning_rate": 0.00014735804902081537,
      "loss": 0.3644,
      "step": 2142
    },
    {
      "epoch": 0.7913589364844904,
      "grad_norm": 0.3114874064922333,
      "learning_rate": 0.00014733341544525188,
      "loss": 0.3626,
      "step": 2143
    },
    {
      "epoch": 0.7917282127031019,
      "grad_norm": 0.3052099347114563,
      "learning_rate": 0.0001473087818696884,
      "loss": 0.3348,
      "step": 2144
    },
    {
      "epoch": 0.7920974889217134,
      "grad_norm": 0.24991333484649658,
      "learning_rate": 0.00014728414829412489,
      "loss": 0.2905,
      "step": 2145
    },
    {
      "epoch": 0.792466765140325,
      "grad_norm": 0.2939784824848175,
      "learning_rate": 0.0001472595147185614,
      "loss": 0.2834,
      "step": 2146
    },
    {
      "epoch": 0.7928360413589365,
      "grad_norm": 0.24958360195159912,
      "learning_rate": 0.00014723488114299791,
      "loss": 0.2484,
      "step": 2147
    },
    {
      "epoch": 0.793205317577548,
      "grad_norm": 0.2922380566596985,
      "learning_rate": 0.00014721024756743443,
      "loss": 0.2775,
      "step": 2148
    },
    {
      "epoch": 0.7935745937961596,
      "grad_norm": 0.3258068561553955,
      "learning_rate": 0.00014718561399187092,
      "loss": 0.2699,
      "step": 2149
    },
    {
      "epoch": 0.7939438700147711,
      "grad_norm": 0.3072319030761719,
      "learning_rate": 0.00014716098041630743,
      "loss": 0.3221,
      "step": 2150
    },
    {
      "epoch": 0.7939438700147711,
      "eval_loss": 0.29739075899124146,
      "eval_runtime": 5.8631,
      "eval_samples_per_second": 8.528,
      "eval_steps_per_second": 1.194,
      "step": 2150
    },
    {
      "epoch": 0.7943131462333826,
      "grad_norm": 0.28948506712913513,
      "learning_rate": 0.00014713634684074392,
      "loss": 0.2879,
      "step": 2151
    },
    {
      "epoch": 0.794682422451994,
      "grad_norm": 0.338868647813797,
      "learning_rate": 0.00014711171326518046,
      "loss": 0.3321,
      "step": 2152
    },
    {
      "epoch": 0.7950516986706057,
      "grad_norm": 0.32467564940452576,
      "learning_rate": 0.00014708707968961695,
      "loss": 0.3321,
      "step": 2153
    },
    {
      "epoch": 0.7954209748892171,
      "grad_norm": 0.3621566891670227,
      "learning_rate": 0.00014706244611405346,
      "loss": 0.4035,
      "step": 2154
    },
    {
      "epoch": 0.7957902511078286,
      "grad_norm": 0.3736126720905304,
      "learning_rate": 0.00014703781253848995,
      "loss": 0.3209,
      "step": 2155
    },
    {
      "epoch": 0.7961595273264401,
      "grad_norm": 0.2838221788406372,
      "learning_rate": 0.00014701317896292647,
      "loss": 0.2987,
      "step": 2156
    },
    {
      "epoch": 0.7965288035450517,
      "grad_norm": 0.3212393522262573,
      "learning_rate": 0.00014698854538736298,
      "loss": 0.3889,
      "step": 2157
    },
    {
      "epoch": 0.7968980797636632,
      "grad_norm": 0.31640344858169556,
      "learning_rate": 0.0001469639118117995,
      "loss": 0.3563,
      "step": 2158
    },
    {
      "epoch": 0.7972673559822747,
      "grad_norm": 0.29704806208610535,
      "learning_rate": 0.00014693927823623598,
      "loss": 0.3044,
      "step": 2159
    },
    {
      "epoch": 0.7976366322008862,
      "grad_norm": 0.34912556409835815,
      "learning_rate": 0.0001469146446606725,
      "loss": 0.3201,
      "step": 2160
    },
    {
      "epoch": 0.7980059084194978,
      "grad_norm": 0.2666184604167938,
      "learning_rate": 0.000146890011085109,
      "loss": 0.346,
      "step": 2161
    },
    {
      "epoch": 0.7983751846381093,
      "grad_norm": 0.29834869503974915,
      "learning_rate": 0.00014686537750954553,
      "loss": 0.3404,
      "step": 2162
    },
    {
      "epoch": 0.7987444608567208,
      "grad_norm": 0.2953677475452423,
      "learning_rate": 0.00014684074393398201,
      "loss": 0.2415,
      "step": 2163
    },
    {
      "epoch": 0.7991137370753324,
      "grad_norm": 0.31550806760787964,
      "learning_rate": 0.00014681611035841853,
      "loss": 0.2797,
      "step": 2164
    },
    {
      "epoch": 0.7994830132939439,
      "grad_norm": 0.2836500108242035,
      "learning_rate": 0.00014679147678285504,
      "loss": 0.3396,
      "step": 2165
    },
    {
      "epoch": 0.7998522895125554,
      "grad_norm": 0.2543346881866455,
      "learning_rate": 0.00014676684320729156,
      "loss": 0.3069,
      "step": 2166
    },
    {
      "epoch": 0.8002215657311669,
      "grad_norm": 0.30015766620635986,
      "learning_rate": 0.00014674220963172805,
      "loss": 0.3124,
      "step": 2167
    },
    {
      "epoch": 0.8005908419497785,
      "grad_norm": 0.2661927342414856,
      "learning_rate": 0.00014671757605616456,
      "loss": 0.3075,
      "step": 2168
    },
    {
      "epoch": 0.80096011816839,
      "grad_norm": 0.3345184326171875,
      "learning_rate": 0.00014669294248060105,
      "loss": 0.3104,
      "step": 2169
    },
    {
      "epoch": 0.8013293943870015,
      "grad_norm": 0.2684672474861145,
      "learning_rate": 0.0001466683089050376,
      "loss": 0.3331,
      "step": 2170
    },
    {
      "epoch": 0.801698670605613,
      "grad_norm": 0.2914975583553314,
      "learning_rate": 0.00014664367532947408,
      "loss": 0.3058,
      "step": 2171
    },
    {
      "epoch": 0.8020679468242246,
      "grad_norm": 0.23483692109584808,
      "learning_rate": 0.0001466190417539106,
      "loss": 0.2726,
      "step": 2172
    },
    {
      "epoch": 0.802437223042836,
      "grad_norm": 0.2587801516056061,
      "learning_rate": 0.00014659440817834708,
      "loss": 0.3331,
      "step": 2173
    },
    {
      "epoch": 0.8028064992614475,
      "grad_norm": 0.3047831058502197,
      "learning_rate": 0.0001465697746027836,
      "loss": 0.3626,
      "step": 2174
    },
    {
      "epoch": 0.803175775480059,
      "grad_norm": 0.4038315415382385,
      "learning_rate": 0.0001465451410272201,
      "loss": 0.3259,
      "step": 2175
    },
    {
      "epoch": 0.8035450516986706,
      "grad_norm": 0.22119131684303284,
      "learning_rate": 0.00014652050745165662,
      "loss": 0.2991,
      "step": 2176
    },
    {
      "epoch": 0.8039143279172821,
      "grad_norm": 0.26064956188201904,
      "learning_rate": 0.0001464958738760931,
      "loss": 0.2791,
      "step": 2177
    },
    {
      "epoch": 0.8042836041358936,
      "grad_norm": 0.2967536449432373,
      "learning_rate": 0.00014647124030052962,
      "loss": 0.3199,
      "step": 2178
    },
    {
      "epoch": 0.8046528803545052,
      "grad_norm": 0.2943962812423706,
      "learning_rate": 0.00014644660672496614,
      "loss": 0.2945,
      "step": 2179
    },
    {
      "epoch": 0.8050221565731167,
      "grad_norm": 0.2509021759033203,
      "learning_rate": 0.00014642197314940265,
      "loss": 0.3017,
      "step": 2180
    },
    {
      "epoch": 0.8053914327917282,
      "grad_norm": 0.3554409146308899,
      "learning_rate": 0.00014639733957383914,
      "loss": 0.3672,
      "step": 2181
    },
    {
      "epoch": 0.8057607090103397,
      "grad_norm": 0.2364262491464615,
      "learning_rate": 0.00014637270599827566,
      "loss": 0.2554,
      "step": 2182
    },
    {
      "epoch": 0.8061299852289513,
      "grad_norm": 0.3607995808124542,
      "learning_rate": 0.00014634807242271214,
      "loss": 0.3322,
      "step": 2183
    },
    {
      "epoch": 0.8064992614475628,
      "grad_norm": 0.2653542160987854,
      "learning_rate": 0.00014632343884714869,
      "loss": 0.2872,
      "step": 2184
    },
    {
      "epoch": 0.8068685376661743,
      "grad_norm": 0.257291316986084,
      "learning_rate": 0.00014629880527158517,
      "loss": 0.251,
      "step": 2185
    },
    {
      "epoch": 0.8072378138847858,
      "grad_norm": 0.31101787090301514,
      "learning_rate": 0.0001462741716960217,
      "loss": 0.2955,
      "step": 2186
    },
    {
      "epoch": 0.8076070901033974,
      "grad_norm": 0.23836660385131836,
      "learning_rate": 0.00014624953812045818,
      "loss": 0.2923,
      "step": 2187
    },
    {
      "epoch": 0.8079763663220089,
      "grad_norm": 0.384665310382843,
      "learning_rate": 0.0001462249045448947,
      "loss": 0.3695,
      "step": 2188
    },
    {
      "epoch": 0.8083456425406204,
      "grad_norm": 0.3111051321029663,
      "learning_rate": 0.0001462002709693312,
      "loss": 0.3134,
      "step": 2189
    },
    {
      "epoch": 0.808714918759232,
      "grad_norm": 0.270060658454895,
      "learning_rate": 0.00014617563739376772,
      "loss": 0.258,
      "step": 2190
    },
    {
      "epoch": 0.8090841949778435,
      "grad_norm": 0.2257768213748932,
      "learning_rate": 0.0001461510038182042,
      "loss": 0.2212,
      "step": 2191
    },
    {
      "epoch": 0.8094534711964549,
      "grad_norm": 0.2495872676372528,
      "learning_rate": 0.00014612637024264072,
      "loss": 0.2808,
      "step": 2192
    },
    {
      "epoch": 0.8098227474150664,
      "grad_norm": 0.29808223247528076,
      "learning_rate": 0.00014610173666707724,
      "loss": 0.259,
      "step": 2193
    },
    {
      "epoch": 0.810192023633678,
      "grad_norm": 0.267164409160614,
      "learning_rate": 0.00014607710309151375,
      "loss": 0.3281,
      "step": 2194
    },
    {
      "epoch": 0.8105612998522895,
      "grad_norm": 0.2977560758590698,
      "learning_rate": 0.00014605246951595024,
      "loss": 0.3097,
      "step": 2195
    },
    {
      "epoch": 0.810930576070901,
      "grad_norm": 0.28259027004241943,
      "learning_rate": 0.00014602783594038675,
      "loss": 0.3511,
      "step": 2196
    },
    {
      "epoch": 0.8112998522895125,
      "grad_norm": 0.3065144419670105,
      "learning_rate": 0.00014600320236482327,
      "loss": 0.3589,
      "step": 2197
    },
    {
      "epoch": 0.8116691285081241,
      "grad_norm": 0.24390994012355804,
      "learning_rate": 0.00014597856878925978,
      "loss": 0.2485,
      "step": 2198
    },
    {
      "epoch": 0.8120384047267356,
      "grad_norm": 0.25467053055763245,
      "learning_rate": 0.00014595393521369627,
      "loss": 0.2408,
      "step": 2199
    },
    {
      "epoch": 0.8124076809453471,
      "grad_norm": 0.3040908873081207,
      "learning_rate": 0.00014592930163813278,
      "loss": 0.3687,
      "step": 2200
    },
    {
      "epoch": 0.8124076809453471,
      "eval_loss": 0.2931758761405945,
      "eval_runtime": 5.8676,
      "eval_samples_per_second": 8.521,
      "eval_steps_per_second": 1.193,
      "step": 2200
    },
    {
      "epoch": 0.8127769571639586,
      "grad_norm": 0.2869749665260315,
      "learning_rate": 0.00014590466806256927,
      "loss": 0.2708,
      "step": 2201
    },
    {
      "epoch": 0.8131462333825702,
      "grad_norm": 0.22452616691589355,
      "learning_rate": 0.0001458800344870058,
      "loss": 0.2482,
      "step": 2202
    },
    {
      "epoch": 0.8135155096011817,
      "grad_norm": 0.22195591032505035,
      "learning_rate": 0.0001458554009114423,
      "loss": 0.2664,
      "step": 2203
    },
    {
      "epoch": 0.8138847858197932,
      "grad_norm": 0.32908597588539124,
      "learning_rate": 0.00014583076733587882,
      "loss": 0.24,
      "step": 2204
    },
    {
      "epoch": 0.8142540620384048,
      "grad_norm": 0.29719024896621704,
      "learning_rate": 0.0001458061337603153,
      "loss": 0.3623,
      "step": 2205
    },
    {
      "epoch": 0.8146233382570163,
      "grad_norm": 0.28195294737815857,
      "learning_rate": 0.00014578150018475182,
      "loss": 0.3519,
      "step": 2206
    },
    {
      "epoch": 0.8149926144756278,
      "grad_norm": 0.2806910276412964,
      "learning_rate": 0.00014575686660918833,
      "loss": 0.2922,
      "step": 2207
    },
    {
      "epoch": 0.8153618906942393,
      "grad_norm": 0.31184086203575134,
      "learning_rate": 0.00014573223303362485,
      "loss": 0.3434,
      "step": 2208
    },
    {
      "epoch": 0.8157311669128509,
      "grad_norm": 0.2777872383594513,
      "learning_rate": 0.00014570759945806133,
      "loss": 0.3073,
      "step": 2209
    },
    {
      "epoch": 0.8161004431314623,
      "grad_norm": 0.24332855641841888,
      "learning_rate": 0.00014568296588249785,
      "loss": 0.2917,
      "step": 2210
    },
    {
      "epoch": 0.8164697193500738,
      "grad_norm": 0.23340560495853424,
      "learning_rate": 0.00014565833230693436,
      "loss": 0.2574,
      "step": 2211
    },
    {
      "epoch": 0.8168389955686853,
      "grad_norm": 0.2942739725112915,
      "learning_rate": 0.00014563369873137088,
      "loss": 0.3229,
      "step": 2212
    },
    {
      "epoch": 0.8172082717872969,
      "grad_norm": 0.283641517162323,
      "learning_rate": 0.00014560906515580737,
      "loss": 0.3321,
      "step": 2213
    },
    {
      "epoch": 0.8175775480059084,
      "grad_norm": 0.28562578558921814,
      "learning_rate": 0.00014558443158024388,
      "loss": 0.3235,
      "step": 2214
    },
    {
      "epoch": 0.8179468242245199,
      "grad_norm": 0.3028421700000763,
      "learning_rate": 0.00014555979800468037,
      "loss": 0.3386,
      "step": 2215
    },
    {
      "epoch": 0.8183161004431314,
      "grad_norm": 0.26572704315185547,
      "learning_rate": 0.0001455351644291169,
      "loss": 0.2618,
      "step": 2216
    },
    {
      "epoch": 0.818685376661743,
      "grad_norm": 0.3396584987640381,
      "learning_rate": 0.0001455105308535534,
      "loss": 0.278,
      "step": 2217
    },
    {
      "epoch": 0.8190546528803545,
      "grad_norm": 0.24953824281692505,
      "learning_rate": 0.0001454858972779899,
      "loss": 0.2935,
      "step": 2218
    },
    {
      "epoch": 0.819423929098966,
      "grad_norm": 0.27380311489105225,
      "learning_rate": 0.0001454612637024264,
      "loss": 0.3147,
      "step": 2219
    },
    {
      "epoch": 0.8197932053175776,
      "grad_norm": 0.33168891072273254,
      "learning_rate": 0.00014543663012686291,
      "loss": 0.3503,
      "step": 2220
    },
    {
      "epoch": 0.8201624815361891,
      "grad_norm": 0.24705637991428375,
      "learning_rate": 0.00014541199655129943,
      "loss": 0.2921,
      "step": 2221
    },
    {
      "epoch": 0.8205317577548006,
      "grad_norm": 0.25001150369644165,
      "learning_rate": 0.00014538736297573594,
      "loss": 0.2642,
      "step": 2222
    },
    {
      "epoch": 0.8209010339734121,
      "grad_norm": 0.27988019585609436,
      "learning_rate": 0.00014536272940017243,
      "loss": 0.3064,
      "step": 2223
    },
    {
      "epoch": 0.8212703101920237,
      "grad_norm": 0.267130047082901,
      "learning_rate": 0.00014533809582460895,
      "loss": 0.2935,
      "step": 2224
    },
    {
      "epoch": 0.8216395864106352,
      "grad_norm": 0.2690856456756592,
      "learning_rate": 0.00014531346224904546,
      "loss": 0.2765,
      "step": 2225
    },
    {
      "epoch": 0.8220088626292467,
      "grad_norm": 0.2621283233165741,
      "learning_rate": 0.00014528882867348198,
      "loss": 0.3099,
      "step": 2226
    },
    {
      "epoch": 0.8223781388478582,
      "grad_norm": 0.30594637989997864,
      "learning_rate": 0.00014526419509791846,
      "loss": 0.3001,
      "step": 2227
    },
    {
      "epoch": 0.8227474150664698,
      "grad_norm": 0.254181444644928,
      "learning_rate": 0.00014523956152235498,
      "loss": 0.2717,
      "step": 2228
    },
    {
      "epoch": 0.8231166912850812,
      "grad_norm": 0.26248815655708313,
      "learning_rate": 0.0001452149279467915,
      "loss": 0.2524,
      "step": 2229
    },
    {
      "epoch": 0.8234859675036927,
      "grad_norm": 0.2587265968322754,
      "learning_rate": 0.000145190294371228,
      "loss": 0.314,
      "step": 2230
    },
    {
      "epoch": 0.8238552437223042,
      "grad_norm": 0.2941606044769287,
      "learning_rate": 0.0001451656607956645,
      "loss": 0.339,
      "step": 2231
    },
    {
      "epoch": 0.8242245199409158,
      "grad_norm": 0.2971184253692627,
      "learning_rate": 0.000145141027220101,
      "loss": 0.2879,
      "step": 2232
    },
    {
      "epoch": 0.8245937961595273,
      "grad_norm": 0.21398895978927612,
      "learning_rate": 0.0001451163936445375,
      "loss": 0.2217,
      "step": 2233
    },
    {
      "epoch": 0.8249630723781388,
      "grad_norm": 0.26762524247169495,
      "learning_rate": 0.00014509176006897404,
      "loss": 0.2703,
      "step": 2234
    },
    {
      "epoch": 0.8253323485967504,
      "grad_norm": 0.2811284363269806,
      "learning_rate": 0.00014506712649341053,
      "loss": 0.3259,
      "step": 2235
    },
    {
      "epoch": 0.8257016248153619,
      "grad_norm": 0.33781254291534424,
      "learning_rate": 0.00014504249291784704,
      "loss": 0.348,
      "step": 2236
    },
    {
      "epoch": 0.8260709010339734,
      "grad_norm": 0.32253655791282654,
      "learning_rate": 0.00014501785934228353,
      "loss": 0.3812,
      "step": 2237
    },
    {
      "epoch": 0.8264401772525849,
      "grad_norm": 0.30919092893600464,
      "learning_rate": 0.00014499322576672004,
      "loss": 0.3099,
      "step": 2238
    },
    {
      "epoch": 0.8268094534711965,
      "grad_norm": 0.3416600823402405,
      "learning_rate": 0.00014496859219115656,
      "loss": 0.346,
      "step": 2239
    },
    {
      "epoch": 0.827178729689808,
      "grad_norm": 0.24034634232521057,
      "learning_rate": 0.00014494395861559307,
      "loss": 0.3034,
      "step": 2240
    },
    {
      "epoch": 0.8275480059084195,
      "grad_norm": 0.2517634332180023,
      "learning_rate": 0.00014491932504002956,
      "loss": 0.2773,
      "step": 2241
    },
    {
      "epoch": 0.827917282127031,
      "grad_norm": 0.2968411445617676,
      "learning_rate": 0.00014489469146446607,
      "loss": 0.3548,
      "step": 2242
    },
    {
      "epoch": 0.8282865583456426,
      "grad_norm": 0.33049216866493225,
      "learning_rate": 0.0001448700578889026,
      "loss": 0.3236,
      "step": 2243
    },
    {
      "epoch": 0.8286558345642541,
      "grad_norm": 0.25116103887557983,
      "learning_rate": 0.0001448454243133391,
      "loss": 0.3276,
      "step": 2244
    },
    {
      "epoch": 0.8290251107828656,
      "grad_norm": 0.29226842522621155,
      "learning_rate": 0.0001448207907377756,
      "loss": 0.3263,
      "step": 2245
    },
    {
      "epoch": 0.829394387001477,
      "grad_norm": 0.22805069386959076,
      "learning_rate": 0.0001447961571622121,
      "loss": 0.2605,
      "step": 2246
    },
    {
      "epoch": 0.8297636632200887,
      "grad_norm": 0.2949386239051819,
      "learning_rate": 0.0001447715235866486,
      "loss": 0.3113,
      "step": 2247
    },
    {
      "epoch": 0.8301329394387001,
      "grad_norm": 0.2299480140209198,
      "learning_rate": 0.00014474689001108513,
      "loss": 0.2869,
      "step": 2248
    },
    {
      "epoch": 0.8305022156573116,
      "grad_norm": 0.2542758285999298,
      "learning_rate": 0.00014472225643552162,
      "loss": 0.3202,
      "step": 2249
    },
    {
      "epoch": 0.8308714918759232,
      "grad_norm": 0.2857692241668701,
      "learning_rate": 0.00014469762285995814,
      "loss": 0.2913,
      "step": 2250
    },
    {
      "epoch": 0.8308714918759232,
      "eval_loss": 0.2872462570667267,
      "eval_runtime": 5.8643,
      "eval_samples_per_second": 8.526,
      "eval_steps_per_second": 1.194,
      "step": 2250
    },
    {
      "epoch": 0.8312407680945347,
      "grad_norm": 0.29522812366485596,
      "learning_rate": 0.00014467298928439462,
      "loss": 0.3988,
      "step": 2251
    },
    {
      "epoch": 0.8316100443131462,
      "grad_norm": 0.28120508790016174,
      "learning_rate": 0.00014464835570883114,
      "loss": 0.3674,
      "step": 2252
    },
    {
      "epoch": 0.8319793205317577,
      "grad_norm": 0.27494558691978455,
      "learning_rate": 0.00014462372213326765,
      "loss": 0.3127,
      "step": 2253
    },
    {
      "epoch": 0.8323485967503693,
      "grad_norm": 0.24010087549686432,
      "learning_rate": 0.00014459908855770417,
      "loss": 0.259,
      "step": 2254
    },
    {
      "epoch": 0.8327178729689808,
      "grad_norm": 0.2521667182445526,
      "learning_rate": 0.00014457445498214066,
      "loss": 0.263,
      "step": 2255
    },
    {
      "epoch": 0.8330871491875923,
      "grad_norm": 0.24542008340358734,
      "learning_rate": 0.00014454982140657717,
      "loss": 0.2655,
      "step": 2256
    },
    {
      "epoch": 0.8334564254062038,
      "grad_norm": 0.26360607147216797,
      "learning_rate": 0.00014452518783101369,
      "loss": 0.2994,
      "step": 2257
    },
    {
      "epoch": 0.8338257016248154,
      "grad_norm": 0.33817002177238464,
      "learning_rate": 0.0001445005542554502,
      "loss": 0.3448,
      "step": 2258
    },
    {
      "epoch": 0.8341949778434269,
      "grad_norm": 0.35080987215042114,
      "learning_rate": 0.0001444759206798867,
      "loss": 0.3449,
      "step": 2259
    },
    {
      "epoch": 0.8345642540620384,
      "grad_norm": 0.2686460316181183,
      "learning_rate": 0.0001444512871043232,
      "loss": 0.284,
      "step": 2260
    },
    {
      "epoch": 0.8349335302806499,
      "grad_norm": 0.3396205008029938,
      "learning_rate": 0.00014442665352875972,
      "loss": 0.3735,
      "step": 2261
    },
    {
      "epoch": 0.8353028064992615,
      "grad_norm": 0.25770655274391174,
      "learning_rate": 0.00014440201995319623,
      "loss": 0.2561,
      "step": 2262
    },
    {
      "epoch": 0.835672082717873,
      "grad_norm": 0.23628605902194977,
      "learning_rate": 0.00014437738637763272,
      "loss": 0.2772,
      "step": 2263
    },
    {
      "epoch": 0.8360413589364845,
      "grad_norm": 0.24436742067337036,
      "learning_rate": 0.00014435275280206923,
      "loss": 0.2348,
      "step": 2264
    },
    {
      "epoch": 0.8364106351550961,
      "grad_norm": 0.2738080620765686,
      "learning_rate": 0.00014432811922650572,
      "loss": 0.2965,
      "step": 2265
    },
    {
      "epoch": 0.8367799113737076,
      "grad_norm": 0.26231417059898376,
      "learning_rate": 0.00014430348565094226,
      "loss": 0.2665,
      "step": 2266
    },
    {
      "epoch": 0.837149187592319,
      "grad_norm": 0.30316510796546936,
      "learning_rate": 0.00014427885207537875,
      "loss": 0.3225,
      "step": 2267
    },
    {
      "epoch": 0.8375184638109305,
      "grad_norm": 0.2635595500469208,
      "learning_rate": 0.00014425421849981526,
      "loss": 0.2904,
      "step": 2268
    },
    {
      "epoch": 0.8378877400295421,
      "grad_norm": 0.24698545038700104,
      "learning_rate": 0.00014422958492425175,
      "loss": 0.2781,
      "step": 2269
    },
    {
      "epoch": 0.8382570162481536,
      "grad_norm": 0.2639318108558655,
      "learning_rate": 0.00014420495134868827,
      "loss": 0.3278,
      "step": 2270
    },
    {
      "epoch": 0.8386262924667651,
      "grad_norm": 0.3104601502418518,
      "learning_rate": 0.00014418031777312478,
      "loss": 0.2803,
      "step": 2271
    },
    {
      "epoch": 0.8389955686853766,
      "grad_norm": 0.2913442850112915,
      "learning_rate": 0.0001441556841975613,
      "loss": 0.2673,
      "step": 2272
    },
    {
      "epoch": 0.8393648449039882,
      "grad_norm": 0.3268960416316986,
      "learning_rate": 0.00014413105062199778,
      "loss": 0.311,
      "step": 2273
    },
    {
      "epoch": 0.8397341211225997,
      "grad_norm": 0.2585023045539856,
      "learning_rate": 0.0001441064170464343,
      "loss": 0.2834,
      "step": 2274
    },
    {
      "epoch": 0.8401033973412112,
      "grad_norm": 0.2671329975128174,
      "learning_rate": 0.0001440817834708708,
      "loss": 0.2926,
      "step": 2275
    },
    {
      "epoch": 0.8404726735598228,
      "grad_norm": 0.3078802824020386,
      "learning_rate": 0.00014405714989530733,
      "loss": 0.2788,
      "step": 2276
    },
    {
      "epoch": 0.8408419497784343,
      "grad_norm": 0.25752460956573486,
      "learning_rate": 0.00014403251631974382,
      "loss": 0.2948,
      "step": 2277
    },
    {
      "epoch": 0.8412112259970458,
      "grad_norm": 0.24336199462413788,
      "learning_rate": 0.00014400788274418033,
      "loss": 0.2689,
      "step": 2278
    },
    {
      "epoch": 0.8415805022156573,
      "grad_norm": 0.3261638283729553,
      "learning_rate": 0.00014398324916861682,
      "loss": 0.3442,
      "step": 2279
    },
    {
      "epoch": 0.8419497784342689,
      "grad_norm": 0.3471073508262634,
      "learning_rate": 0.00014395861559305336,
      "loss": 0.368,
      "step": 2280
    },
    {
      "epoch": 0.8423190546528804,
      "grad_norm": 0.2980787456035614,
      "learning_rate": 0.00014393398201748985,
      "loss": 0.3559,
      "step": 2281
    },
    {
      "epoch": 0.8426883308714919,
      "grad_norm": 0.29910537600517273,
      "learning_rate": 0.00014390934844192636,
      "loss": 0.2764,
      "step": 2282
    },
    {
      "epoch": 0.8430576070901034,
      "grad_norm": 0.27420300245285034,
      "learning_rate": 0.00014388471486636285,
      "loss": 0.3385,
      "step": 2283
    },
    {
      "epoch": 0.843426883308715,
      "grad_norm": 0.26107993721961975,
      "learning_rate": 0.00014386008129079936,
      "loss": 0.3044,
      "step": 2284
    },
    {
      "epoch": 0.8437961595273265,
      "grad_norm": 0.23961122334003448,
      "learning_rate": 0.00014383544771523588,
      "loss": 0.2556,
      "step": 2285
    },
    {
      "epoch": 0.8441654357459379,
      "grad_norm": 0.2519873380661011,
      "learning_rate": 0.0001438108141396724,
      "loss": 0.2721,
      "step": 2286
    },
    {
      "epoch": 0.8445347119645494,
      "grad_norm": 0.2999603748321533,
      "learning_rate": 0.00014378618056410888,
      "loss": 0.3113,
      "step": 2287
    },
    {
      "epoch": 0.844903988183161,
      "grad_norm": 0.24635626375675201,
      "learning_rate": 0.0001437615469885454,
      "loss": 0.2987,
      "step": 2288
    },
    {
      "epoch": 0.8452732644017725,
      "grad_norm": 0.26801541447639465,
      "learning_rate": 0.0001437369134129819,
      "loss": 0.3288,
      "step": 2289
    },
    {
      "epoch": 0.845642540620384,
      "grad_norm": 0.2706718146800995,
      "learning_rate": 0.00014371227983741842,
      "loss": 0.2828,
      "step": 2290
    },
    {
      "epoch": 0.8460118168389956,
      "grad_norm": 0.31260946393013,
      "learning_rate": 0.0001436876462618549,
      "loss": 0.3028,
      "step": 2291
    },
    {
      "epoch": 0.8463810930576071,
      "grad_norm": 0.26500388979911804,
      "learning_rate": 0.00014366301268629143,
      "loss": 0.2812,
      "step": 2292
    },
    {
      "epoch": 0.8467503692762186,
      "grad_norm": 0.28190380334854126,
      "learning_rate": 0.00014363837911072794,
      "loss": 0.3013,
      "step": 2293
    },
    {
      "epoch": 0.8471196454948301,
      "grad_norm": 0.30988219380378723,
      "learning_rate": 0.00014361374553516446,
      "loss": 0.3557,
      "step": 2294
    },
    {
      "epoch": 0.8474889217134417,
      "grad_norm": 0.3184913992881775,
      "learning_rate": 0.00014358911195960094,
      "loss": 0.3377,
      "step": 2295
    },
    {
      "epoch": 0.8478581979320532,
      "grad_norm": 0.2852730453014374,
      "learning_rate": 0.00014356447838403746,
      "loss": 0.3358,
      "step": 2296
    },
    {
      "epoch": 0.8482274741506647,
      "grad_norm": 0.3184800148010254,
      "learning_rate": 0.00014353984480847395,
      "loss": 0.2749,
      "step": 2297
    },
    {
      "epoch": 0.8485967503692762,
      "grad_norm": 0.31259408593177795,
      "learning_rate": 0.0001435152112329105,
      "loss": 0.3372,
      "step": 2298
    },
    {
      "epoch": 0.8489660265878878,
      "grad_norm": 0.2789030075073242,
      "learning_rate": 0.00014349057765734697,
      "loss": 0.3345,
      "step": 2299
    },
    {
      "epoch": 0.8493353028064993,
      "grad_norm": 0.290223091840744,
      "learning_rate": 0.0001434659440817835,
      "loss": 0.3217,
      "step": 2300
    },
    {
      "epoch": 0.8493353028064993,
      "eval_loss": 0.2873377799987793,
      "eval_runtime": 5.8564,
      "eval_samples_per_second": 8.538,
      "eval_steps_per_second": 1.195,
      "step": 2300
    },
    {
      "epoch": 0.8497045790251108,
      "grad_norm": 0.3126521706581116,
      "learning_rate": 0.00014344131050621998,
      "loss": 0.3818,
      "step": 2301
    },
    {
      "epoch": 0.8500738552437223,
      "grad_norm": 0.33014044165611267,
      "learning_rate": 0.0001434166769306565,
      "loss": 0.3932,
      "step": 2302
    },
    {
      "epoch": 0.8504431314623339,
      "grad_norm": 0.3226761221885681,
      "learning_rate": 0.000143392043355093,
      "loss": 0.321,
      "step": 2303
    },
    {
      "epoch": 0.8508124076809453,
      "grad_norm": 0.2537625730037689,
      "learning_rate": 0.00014336740977952952,
      "loss": 0.2429,
      "step": 2304
    },
    {
      "epoch": 0.8511816838995568,
      "grad_norm": 0.274652898311615,
      "learning_rate": 0.000143342776203966,
      "loss": 0.2672,
      "step": 2305
    },
    {
      "epoch": 0.8515509601181684,
      "grad_norm": 0.3447267711162567,
      "learning_rate": 0.00014331814262840252,
      "loss": 0.3365,
      "step": 2306
    },
    {
      "epoch": 0.8519202363367799,
      "grad_norm": 0.23127812147140503,
      "learning_rate": 0.00014329350905283904,
      "loss": 0.2598,
      "step": 2307
    },
    {
      "epoch": 0.8522895125553914,
      "grad_norm": 0.2893903851509094,
      "learning_rate": 0.00014326887547727555,
      "loss": 0.3396,
      "step": 2308
    },
    {
      "epoch": 0.8526587887740029,
      "grad_norm": 0.25716060400009155,
      "learning_rate": 0.00014324424190171204,
      "loss": 0.2831,
      "step": 2309
    },
    {
      "epoch": 0.8530280649926145,
      "grad_norm": 0.27485719323158264,
      "learning_rate": 0.00014321960832614855,
      "loss": 0.2711,
      "step": 2310
    },
    {
      "epoch": 0.853397341211226,
      "grad_norm": 0.2990472912788391,
      "learning_rate": 0.00014319497475058504,
      "loss": 0.3362,
      "step": 2311
    },
    {
      "epoch": 0.8537666174298375,
      "grad_norm": 0.27512043714523315,
      "learning_rate": 0.00014317034117502158,
      "loss": 0.3013,
      "step": 2312
    },
    {
      "epoch": 0.854135893648449,
      "grad_norm": 0.22718726098537445,
      "learning_rate": 0.00014314570759945807,
      "loss": 0.2655,
      "step": 2313
    },
    {
      "epoch": 0.8545051698670606,
      "grad_norm": 0.3475792407989502,
      "learning_rate": 0.00014312107402389459,
      "loss": 0.3464,
      "step": 2314
    },
    {
      "epoch": 0.8548744460856721,
      "grad_norm": 0.2669030427932739,
      "learning_rate": 0.00014309644044833107,
      "loss": 0.3412,
      "step": 2315
    },
    {
      "epoch": 0.8552437223042836,
      "grad_norm": 0.2855254113674164,
      "learning_rate": 0.0001430718068727676,
      "loss": 0.2962,
      "step": 2316
    },
    {
      "epoch": 0.8556129985228951,
      "grad_norm": 0.22337274253368378,
      "learning_rate": 0.0001430471732972041,
      "loss": 0.2728,
      "step": 2317
    },
    {
      "epoch": 0.8559822747415067,
      "grad_norm": 0.3366328477859497,
      "learning_rate": 0.00014302253972164062,
      "loss": 0.2873,
      "step": 2318
    },
    {
      "epoch": 0.8563515509601182,
      "grad_norm": 0.2495029717683792,
      "learning_rate": 0.0001429979061460771,
      "loss": 0.3059,
      "step": 2319
    },
    {
      "epoch": 0.8567208271787297,
      "grad_norm": 0.2128600776195526,
      "learning_rate": 0.00014297327257051362,
      "loss": 0.2419,
      "step": 2320
    },
    {
      "epoch": 0.8570901033973413,
      "grad_norm": 0.31186309456825256,
      "learning_rate": 0.00014294863899495013,
      "loss": 0.3131,
      "step": 2321
    },
    {
      "epoch": 0.8574593796159528,
      "grad_norm": 0.24287007749080658,
      "learning_rate": 0.00014292400541938665,
      "loss": 0.2501,
      "step": 2322
    },
    {
      "epoch": 0.8578286558345642,
      "grad_norm": 0.26953041553497314,
      "learning_rate": 0.00014289937184382314,
      "loss": 0.2907,
      "step": 2323
    },
    {
      "epoch": 0.8581979320531757,
      "grad_norm": 0.2574095129966736,
      "learning_rate": 0.00014287473826825965,
      "loss": 0.2775,
      "step": 2324
    },
    {
      "epoch": 0.8585672082717873,
      "grad_norm": 0.33651429414749146,
      "learning_rate": 0.00014285010469269614,
      "loss": 0.2649,
      "step": 2325
    },
    {
      "epoch": 0.8589364844903988,
      "grad_norm": 0.2808817923069,
      "learning_rate": 0.00014282547111713265,
      "loss": 0.3235,
      "step": 2326
    },
    {
      "epoch": 0.8593057607090103,
      "grad_norm": 0.2517601549625397,
      "learning_rate": 0.00014280083754156917,
      "loss": 0.2747,
      "step": 2327
    },
    {
      "epoch": 0.8596750369276218,
      "grad_norm": 0.4052937626838684,
      "learning_rate": 0.00014277620396600566,
      "loss": 0.361,
      "step": 2328
    },
    {
      "epoch": 0.8600443131462334,
      "grad_norm": 0.294210284948349,
      "learning_rate": 0.00014275157039044217,
      "loss": 0.2779,
      "step": 2329
    },
    {
      "epoch": 0.8604135893648449,
      "grad_norm": 0.30568668246269226,
      "learning_rate": 0.00014272693681487868,
      "loss": 0.2896,
      "step": 2330
    },
    {
      "epoch": 0.8607828655834564,
      "grad_norm": 0.3126921057701111,
      "learning_rate": 0.0001427023032393152,
      "loss": 0.3514,
      "step": 2331
    },
    {
      "epoch": 0.8611521418020679,
      "grad_norm": 0.29571497440338135,
      "learning_rate": 0.0001426776696637517,
      "loss": 0.3569,
      "step": 2332
    },
    {
      "epoch": 0.8615214180206795,
      "grad_norm": 0.22604425251483917,
      "learning_rate": 0.0001426530360881882,
      "loss": 0.2204,
      "step": 2333
    },
    {
      "epoch": 0.861890694239291,
      "grad_norm": 0.31958821415901184,
      "learning_rate": 0.00014262840251262472,
      "loss": 0.3159,
      "step": 2334
    },
    {
      "epoch": 0.8622599704579025,
      "grad_norm": 0.24583856761455536,
      "learning_rate": 0.00014260376893706123,
      "loss": 0.2815,
      "step": 2335
    },
    {
      "epoch": 0.8626292466765141,
      "grad_norm": 0.263694167137146,
      "learning_rate": 0.00014257913536149772,
      "loss": 0.303,
      "step": 2336
    },
    {
      "epoch": 0.8629985228951256,
      "grad_norm": 0.37396731972694397,
      "learning_rate": 0.00014255450178593423,
      "loss": 0.3883,
      "step": 2337
    },
    {
      "epoch": 0.8633677991137371,
      "grad_norm": 0.2750912010669708,
      "learning_rate": 0.00014252986821037072,
      "loss": 0.2583,
      "step": 2338
    },
    {
      "epoch": 0.8637370753323486,
      "grad_norm": 0.25109225511550903,
      "learning_rate": 0.00014250523463480726,
      "loss": 0.2766,
      "step": 2339
    },
    {
      "epoch": 0.8641063515509602,
      "grad_norm": 0.2866942584514618,
      "learning_rate": 0.00014248060105924375,
      "loss": 0.293,
      "step": 2340
    },
    {
      "epoch": 0.8644756277695717,
      "grad_norm": 0.24580956995487213,
      "learning_rate": 0.00014245596748368026,
      "loss": 0.3142,
      "step": 2341
    },
    {
      "epoch": 0.8648449039881831,
      "grad_norm": 0.2784372568130493,
      "learning_rate": 0.00014243133390811675,
      "loss": 0.3346,
      "step": 2342
    },
    {
      "epoch": 0.8652141802067946,
      "grad_norm": 0.34338557720184326,
      "learning_rate": 0.00014240670033255327,
      "loss": 0.3088,
      "step": 2343
    },
    {
      "epoch": 0.8655834564254062,
      "grad_norm": 0.25998613238334656,
      "learning_rate": 0.00014238206675698978,
      "loss": 0.2599,
      "step": 2344
    },
    {
      "epoch": 0.8659527326440177,
      "grad_norm": 0.22598931193351746,
      "learning_rate": 0.0001423574331814263,
      "loss": 0.2598,
      "step": 2345
    },
    {
      "epoch": 0.8663220088626292,
      "grad_norm": 0.2820592224597931,
      "learning_rate": 0.00014233279960586278,
      "loss": 0.3211,
      "step": 2346
    },
    {
      "epoch": 0.8666912850812407,
      "grad_norm": 0.37511417269706726,
      "learning_rate": 0.0001423081660302993,
      "loss": 0.2871,
      "step": 2347
    },
    {
      "epoch": 0.8670605612998523,
      "grad_norm": 0.24090541899204254,
      "learning_rate": 0.0001422835324547358,
      "loss": 0.3016,
      "step": 2348
    },
    {
      "epoch": 0.8674298375184638,
      "grad_norm": 0.2671396732330322,
      "learning_rate": 0.00014225889887917233,
      "loss": 0.3033,
      "step": 2349
    },
    {
      "epoch": 0.8677991137370753,
      "grad_norm": 0.2758866846561432,
      "learning_rate": 0.00014223426530360881,
      "loss": 0.2655,
      "step": 2350
    },
    {
      "epoch": 0.8677991137370753,
      "eval_loss": 0.28671786189079285,
      "eval_runtime": 5.8659,
      "eval_samples_per_second": 8.524,
      "eval_steps_per_second": 1.193,
      "step": 2350
    },
    {
      "epoch": 0.8681683899556869,
      "grad_norm": 0.22430068254470825,
      "learning_rate": 0.00014220963172804533,
      "loss": 0.2786,
      "step": 2351
    },
    {
      "epoch": 0.8685376661742984,
      "grad_norm": 0.2884713411331177,
      "learning_rate": 0.00014218499815248182,
      "loss": 0.2749,
      "step": 2352
    },
    {
      "epoch": 0.8689069423929099,
      "grad_norm": 0.27482378482818604,
      "learning_rate": 0.00014216036457691836,
      "loss": 0.3344,
      "step": 2353
    },
    {
      "epoch": 0.8692762186115214,
      "grad_norm": 0.3267439603805542,
      "learning_rate": 0.00014213573100135485,
      "loss": 0.3404,
      "step": 2354
    },
    {
      "epoch": 0.869645494830133,
      "grad_norm": 0.261015385389328,
      "learning_rate": 0.00014211109742579136,
      "loss": 0.2585,
      "step": 2355
    },
    {
      "epoch": 0.8700147710487445,
      "grad_norm": 0.28181374073028564,
      "learning_rate": 0.00014208646385022785,
      "loss": 0.2542,
      "step": 2356
    },
    {
      "epoch": 0.870384047267356,
      "grad_norm": 0.2474304735660553,
      "learning_rate": 0.00014206183027466436,
      "loss": 0.2562,
      "step": 2357
    },
    {
      "epoch": 0.8707533234859675,
      "grad_norm": 0.39997008442878723,
      "learning_rate": 0.00014203719669910088,
      "loss": 0.2984,
      "step": 2358
    },
    {
      "epoch": 0.8711225997045791,
      "grad_norm": 0.23000465333461761,
      "learning_rate": 0.0001420125631235374,
      "loss": 0.2343,
      "step": 2359
    },
    {
      "epoch": 0.8714918759231906,
      "grad_norm": 0.2604723274707794,
      "learning_rate": 0.00014198792954797388,
      "loss": 0.2993,
      "step": 2360
    },
    {
      "epoch": 0.871861152141802,
      "grad_norm": 0.21523945033550262,
      "learning_rate": 0.0001419632959724104,
      "loss": 0.2407,
      "step": 2361
    },
    {
      "epoch": 0.8722304283604135,
      "grad_norm": 0.2766049802303314,
      "learning_rate": 0.0001419386623968469,
      "loss": 0.284,
      "step": 2362
    },
    {
      "epoch": 0.8725997045790251,
      "grad_norm": 0.2615562081336975,
      "learning_rate": 0.00014191402882128342,
      "loss": 0.2522,
      "step": 2363
    },
    {
      "epoch": 0.8729689807976366,
      "grad_norm": 0.258593887090683,
      "learning_rate": 0.0001418893952457199,
      "loss": 0.2703,
      "step": 2364
    },
    {
      "epoch": 0.8733382570162481,
      "grad_norm": 0.2718462646007538,
      "learning_rate": 0.00014186476167015643,
      "loss": 0.2821,
      "step": 2365
    },
    {
      "epoch": 0.8737075332348597,
      "grad_norm": 0.2711513638496399,
      "learning_rate": 0.00014184012809459294,
      "loss": 0.2737,
      "step": 2366
    },
    {
      "epoch": 0.8740768094534712,
      "grad_norm": 0.25952938199043274,
      "learning_rate": 0.00014181549451902946,
      "loss": 0.2519,
      "step": 2367
    },
    {
      "epoch": 0.8744460856720827,
      "grad_norm": 0.24934151768684387,
      "learning_rate": 0.00014179086094346594,
      "loss": 0.2908,
      "step": 2368
    },
    {
      "epoch": 0.8748153618906942,
      "grad_norm": 0.2977253794670105,
      "learning_rate": 0.00014176622736790246,
      "loss": 0.3269,
      "step": 2369
    },
    {
      "epoch": 0.8751846381093058,
      "grad_norm": 0.24035468697547913,
      "learning_rate": 0.00014174159379233894,
      "loss": 0.2403,
      "step": 2370
    },
    {
      "epoch": 0.8755539143279173,
      "grad_norm": 0.3673158884048462,
      "learning_rate": 0.0001417169602167755,
      "loss": 0.3038,
      "step": 2371
    },
    {
      "epoch": 0.8759231905465288,
      "grad_norm": 0.2450585961341858,
      "learning_rate": 0.00014169232664121197,
      "loss": 0.3046,
      "step": 2372
    },
    {
      "epoch": 0.8762924667651403,
      "grad_norm": 0.2410673350095749,
      "learning_rate": 0.0001416676930656485,
      "loss": 0.2426,
      "step": 2373
    },
    {
      "epoch": 0.8766617429837519,
      "grad_norm": 0.3130303621292114,
      "learning_rate": 0.00014164305949008498,
      "loss": 0.3167,
      "step": 2374
    },
    {
      "epoch": 0.8770310192023634,
      "grad_norm": 0.3219814598560333,
      "learning_rate": 0.0001416184259145215,
      "loss": 0.341,
      "step": 2375
    },
    {
      "epoch": 0.8774002954209749,
      "grad_norm": 0.28468939661979675,
      "learning_rate": 0.000141593792338958,
      "loss": 0.334,
      "step": 2376
    },
    {
      "epoch": 0.8777695716395865,
      "grad_norm": 0.42089036107063293,
      "learning_rate": 0.00014156915876339452,
      "loss": 0.3025,
      "step": 2377
    },
    {
      "epoch": 0.878138847858198,
      "grad_norm": 0.26263752579689026,
      "learning_rate": 0.000141544525187831,
      "loss": 0.2553,
      "step": 2378
    },
    {
      "epoch": 0.8785081240768094,
      "grad_norm": 0.29241836071014404,
      "learning_rate": 0.00014151989161226752,
      "loss": 0.3417,
      "step": 2379
    },
    {
      "epoch": 0.8788774002954209,
      "grad_norm": 0.28693094849586487,
      "learning_rate": 0.00014149525803670404,
      "loss": 0.2685,
      "step": 2380
    },
    {
      "epoch": 0.8792466765140325,
      "grad_norm": 0.2222106158733368,
      "learning_rate": 0.00014147062446114055,
      "loss": 0.2658,
      "step": 2381
    },
    {
      "epoch": 0.879615952732644,
      "grad_norm": 0.32580694556236267,
      "learning_rate": 0.00014144599088557704,
      "loss": 0.3423,
      "step": 2382
    },
    {
      "epoch": 0.8799852289512555,
      "grad_norm": 0.32633674144744873,
      "learning_rate": 0.00014142135731001355,
      "loss": 0.2965,
      "step": 2383
    },
    {
      "epoch": 0.880354505169867,
      "grad_norm": 0.25918981432914734,
      "learning_rate": 0.00014139672373445004,
      "loss": 0.3084,
      "step": 2384
    },
    {
      "epoch": 0.8807237813884786,
      "grad_norm": 0.30258896946907043,
      "learning_rate": 0.00014137209015888658,
      "loss": 0.276,
      "step": 2385
    },
    {
      "epoch": 0.8810930576070901,
      "grad_norm": 0.2822190523147583,
      "learning_rate": 0.00014134745658332307,
      "loss": 0.2632,
      "step": 2386
    },
    {
      "epoch": 0.8814623338257016,
      "grad_norm": 0.27233344316482544,
      "learning_rate": 0.00014132282300775959,
      "loss": 0.3198,
      "step": 2387
    },
    {
      "epoch": 0.8818316100443131,
      "grad_norm": 0.31817078590393066,
      "learning_rate": 0.00014129818943219607,
      "loss": 0.2661,
      "step": 2388
    },
    {
      "epoch": 0.8822008862629247,
      "grad_norm": 0.22350935637950897,
      "learning_rate": 0.0001412735558566326,
      "loss": 0.2517,
      "step": 2389
    },
    {
      "epoch": 0.8825701624815362,
      "grad_norm": 0.32207536697387695,
      "learning_rate": 0.0001412489222810691,
      "loss": 0.3102,
      "step": 2390
    },
    {
      "epoch": 0.8829394387001477,
      "grad_norm": 0.327226459980011,
      "learning_rate": 0.00014122428870550562,
      "loss": 0.3549,
      "step": 2391
    },
    {
      "epoch": 0.8833087149187593,
      "grad_norm": 0.30438438057899475,
      "learning_rate": 0.0001411996551299421,
      "loss": 0.2674,
      "step": 2392
    },
    {
      "epoch": 0.8836779911373708,
      "grad_norm": 0.27010953426361084,
      "learning_rate": 0.00014117502155437862,
      "loss": 0.2689,
      "step": 2393
    },
    {
      "epoch": 0.8840472673559823,
      "grad_norm": 0.29276734590530396,
      "learning_rate": 0.00014115038797881513,
      "loss": 0.2905,
      "step": 2394
    },
    {
      "epoch": 0.8844165435745938,
      "grad_norm": 0.23772290349006653,
      "learning_rate": 0.00014112575440325165,
      "loss": 0.2877,
      "step": 2395
    },
    {
      "epoch": 0.8847858197932054,
      "grad_norm": 0.29832708835601807,
      "learning_rate": 0.00014110112082768814,
      "loss": 0.3108,
      "step": 2396
    },
    {
      "epoch": 0.8851550960118169,
      "grad_norm": 0.2792957127094269,
      "learning_rate": 0.00014107648725212465,
      "loss": 0.2976,
      "step": 2397
    },
    {
      "epoch": 0.8855243722304283,
      "grad_norm": 0.3596116006374359,
      "learning_rate": 0.00014105185367656116,
      "loss": 0.3909,
      "step": 2398
    },
    {
      "epoch": 0.8858936484490398,
      "grad_norm": 0.30374330282211304,
      "learning_rate": 0.00014102722010099768,
      "loss": 0.3588,
      "step": 2399
    },
    {
      "epoch": 0.8862629246676514,
      "grad_norm": 0.25111934542655945,
      "learning_rate": 0.00014100258652543417,
      "loss": 0.2565,
      "step": 2400
    },
    {
      "epoch": 0.8862629246676514,
      "eval_loss": 0.28929802775382996,
      "eval_runtime": 5.8529,
      "eval_samples_per_second": 8.543,
      "eval_steps_per_second": 1.196,
      "step": 2400
    },
    {
      "epoch": 0.8866322008862629,
      "grad_norm": 0.29126715660095215,
      "learning_rate": 0.00014097795294987068,
      "loss": 0.3392,
      "step": 2401
    },
    {
      "epoch": 0.8870014771048744,
      "grad_norm": 0.2631511688232422,
      "learning_rate": 0.00014095331937430717,
      "loss": 0.2636,
      "step": 2402
    },
    {
      "epoch": 0.8873707533234859,
      "grad_norm": 0.27585646510124207,
      "learning_rate": 0.0001409286857987437,
      "loss": 0.2639,
      "step": 2403
    },
    {
      "epoch": 0.8877400295420975,
      "grad_norm": 0.32870951294898987,
      "learning_rate": 0.0001409040522231802,
      "loss": 0.3372,
      "step": 2404
    },
    {
      "epoch": 0.888109305760709,
      "grad_norm": 0.3076072037220001,
      "learning_rate": 0.0001408794186476167,
      "loss": 0.3156,
      "step": 2405
    },
    {
      "epoch": 0.8884785819793205,
      "grad_norm": 0.25015804171562195,
      "learning_rate": 0.0001408547850720532,
      "loss": 0.2378,
      "step": 2406
    },
    {
      "epoch": 0.8888478581979321,
      "grad_norm": 0.2584746479988098,
      "learning_rate": 0.00014083015149648972,
      "loss": 0.2819,
      "step": 2407
    },
    {
      "epoch": 0.8892171344165436,
      "grad_norm": 0.23811596632003784,
      "learning_rate": 0.00014080551792092623,
      "loss": 0.2912,
      "step": 2408
    },
    {
      "epoch": 0.8895864106351551,
      "grad_norm": 0.28469327092170715,
      "learning_rate": 0.00014078088434536274,
      "loss": 0.2826,
      "step": 2409
    },
    {
      "epoch": 0.8899556868537666,
      "grad_norm": 0.30144456028938293,
      "learning_rate": 0.00014075625076979923,
      "loss": 0.2995,
      "step": 2410
    },
    {
      "epoch": 0.8903249630723782,
      "grad_norm": 0.3043076992034912,
      "learning_rate": 0.00014073161719423575,
      "loss": 0.2927,
      "step": 2411
    },
    {
      "epoch": 0.8906942392909897,
      "grad_norm": 0.2810080051422119,
      "learning_rate": 0.00014070698361867226,
      "loss": 0.2659,
      "step": 2412
    },
    {
      "epoch": 0.8910635155096012,
      "grad_norm": 0.23271699249744415,
      "learning_rate": 0.00014068235004310878,
      "loss": 0.2476,
      "step": 2413
    },
    {
      "epoch": 0.8914327917282127,
      "grad_norm": 0.36389675736427307,
      "learning_rate": 0.00014065771646754526,
      "loss": 0.3012,
      "step": 2414
    },
    {
      "epoch": 0.8918020679468243,
      "grad_norm": 0.2137039452791214,
      "learning_rate": 0.00014063308289198178,
      "loss": 0.2447,
      "step": 2415
    },
    {
      "epoch": 0.8921713441654358,
      "grad_norm": 0.24990826845169067,
      "learning_rate": 0.00014060844931641827,
      "loss": 0.2819,
      "step": 2416
    },
    {
      "epoch": 0.8925406203840472,
      "grad_norm": 0.2792392075061798,
      "learning_rate": 0.0001405838157408548,
      "loss": 0.3149,
      "step": 2417
    },
    {
      "epoch": 0.8929098966026587,
      "grad_norm": 0.2583014965057373,
      "learning_rate": 0.0001405591821652913,
      "loss": 0.2634,
      "step": 2418
    },
    {
      "epoch": 0.8932791728212703,
      "grad_norm": 0.25006797909736633,
      "learning_rate": 0.0001405345485897278,
      "loss": 0.2905,
      "step": 2419
    },
    {
      "epoch": 0.8936484490398818,
      "grad_norm": 0.31017082929611206,
      "learning_rate": 0.0001405099150141643,
      "loss": 0.3216,
      "step": 2420
    },
    {
      "epoch": 0.8940177252584933,
      "grad_norm": 0.28957879543304443,
      "learning_rate": 0.0001404852814386008,
      "loss": 0.2999,
      "step": 2421
    },
    {
      "epoch": 0.8943870014771049,
      "grad_norm": 0.25751760601997375,
      "learning_rate": 0.00014046064786303733,
      "loss": 0.2544,
      "step": 2422
    },
    {
      "epoch": 0.8947562776957164,
      "grad_norm": 0.2919774353504181,
      "learning_rate": 0.00014043601428747384,
      "loss": 0.2958,
      "step": 2423
    },
    {
      "epoch": 0.8951255539143279,
      "grad_norm": 0.24311396479606628,
      "learning_rate": 0.00014041138071191033,
      "loss": 0.2683,
      "step": 2424
    },
    {
      "epoch": 0.8954948301329394,
      "grad_norm": 0.2541423439979553,
      "learning_rate": 0.00014038674713634684,
      "loss": 0.2942,
      "step": 2425
    },
    {
      "epoch": 0.895864106351551,
      "grad_norm": 0.21922168135643005,
      "learning_rate": 0.00014036211356078336,
      "loss": 0.2335,
      "step": 2426
    },
    {
      "epoch": 0.8962333825701625,
      "grad_norm": 0.24970941245555878,
      "learning_rate": 0.00014033747998521987,
      "loss": 0.2468,
      "step": 2427
    },
    {
      "epoch": 0.896602658788774,
      "grad_norm": 0.27785637974739075,
      "learning_rate": 0.00014031284640965636,
      "loss": 0.3133,
      "step": 2428
    },
    {
      "epoch": 0.8969719350073855,
      "grad_norm": 0.4019975960254669,
      "learning_rate": 0.00014028821283409287,
      "loss": 0.3573,
      "step": 2429
    },
    {
      "epoch": 0.8973412112259971,
      "grad_norm": 0.23545053601264954,
      "learning_rate": 0.0001402635792585294,
      "loss": 0.272,
      "step": 2430
    },
    {
      "epoch": 0.8977104874446086,
      "grad_norm": 0.29188770055770874,
      "learning_rate": 0.0001402389456829659,
      "loss": 0.2965,
      "step": 2431
    },
    {
      "epoch": 0.8980797636632201,
      "grad_norm": 0.29059723019599915,
      "learning_rate": 0.0001402143121074024,
      "loss": 0.2701,
      "step": 2432
    },
    {
      "epoch": 0.8984490398818316,
      "grad_norm": 0.2653946578502655,
      "learning_rate": 0.0001401896785318389,
      "loss": 0.299,
      "step": 2433
    },
    {
      "epoch": 0.8988183161004432,
      "grad_norm": 0.22633157670497894,
      "learning_rate": 0.0001401650449562754,
      "loss": 0.2622,
      "step": 2434
    },
    {
      "epoch": 0.8991875923190547,
      "grad_norm": 0.2481098473072052,
      "learning_rate": 0.00014014041138071194,
      "loss": 0.2414,
      "step": 2435
    },
    {
      "epoch": 0.8995568685376661,
      "grad_norm": 0.26378968358039856,
      "learning_rate": 0.00014011577780514842,
      "loss": 0.2943,
      "step": 2436
    },
    {
      "epoch": 0.8999261447562777,
      "grad_norm": 0.26425355672836304,
      "learning_rate": 0.00014009114422958494,
      "loss": 0.2669,
      "step": 2437
    },
    {
      "epoch": 0.9002954209748892,
      "grad_norm": 0.31967341899871826,
      "learning_rate": 0.00014006651065402143,
      "loss": 0.3514,
      "step": 2438
    },
    {
      "epoch": 0.9006646971935007,
      "grad_norm": 0.22327451407909393,
      "learning_rate": 0.00014004187707845794,
      "loss": 0.2261,
      "step": 2439
    },
    {
      "epoch": 0.9010339734121122,
      "grad_norm": 0.2849748432636261,
      "learning_rate": 0.00014001724350289445,
      "loss": 0.2701,
      "step": 2440
    },
    {
      "epoch": 0.9014032496307238,
      "grad_norm": 0.37810251116752625,
      "learning_rate": 0.00013999260992733097,
      "loss": 0.3405,
      "step": 2441
    },
    {
      "epoch": 0.9017725258493353,
      "grad_norm": 0.23618659377098083,
      "learning_rate": 0.00013996797635176746,
      "loss": 0.2422,
      "step": 2442
    },
    {
      "epoch": 0.9021418020679468,
      "grad_norm": 0.3084317445755005,
      "learning_rate": 0.00013994334277620397,
      "loss": 0.3264,
      "step": 2443
    },
    {
      "epoch": 0.9025110782865583,
      "grad_norm": 0.31723859906196594,
      "learning_rate": 0.00013991870920064049,
      "loss": 0.3861,
      "step": 2444
    },
    {
      "epoch": 0.9028803545051699,
      "grad_norm": 0.281729519367218,
      "learning_rate": 0.000139894075625077,
      "loss": 0.3022,
      "step": 2445
    },
    {
      "epoch": 0.9032496307237814,
      "grad_norm": 0.28756844997406006,
      "learning_rate": 0.0001398694420495135,
      "loss": 0.2643,
      "step": 2446
    },
    {
      "epoch": 0.9036189069423929,
      "grad_norm": 0.3584077060222626,
      "learning_rate": 0.00013984480847395,
      "loss": 0.3533,
      "step": 2447
    },
    {
      "epoch": 0.9039881831610044,
      "grad_norm": 0.258378267288208,
      "learning_rate": 0.0001398201748983865,
      "loss": 0.3143,
      "step": 2448
    },
    {
      "epoch": 0.904357459379616,
      "grad_norm": 0.25656944513320923,
      "learning_rate": 0.00013979554132282303,
      "loss": 0.2575,
      "step": 2449
    },
    {
      "epoch": 0.9047267355982275,
      "grad_norm": 0.25176844000816345,
      "learning_rate": 0.00013977090774725952,
      "loss": 0.3463,
      "step": 2450
    },
    {
      "epoch": 0.9047267355982275,
      "eval_loss": 0.2817683219909668,
      "eval_runtime": 5.8463,
      "eval_samples_per_second": 8.552,
      "eval_steps_per_second": 1.197,
      "step": 2450
    },
    {
      "epoch": 0.905096011816839,
      "grad_norm": 0.3466639816761017,
      "learning_rate": 0.00013974627417169603,
      "loss": 0.3071,
      "step": 2451
    },
    {
      "epoch": 0.9054652880354506,
      "grad_norm": 0.2617734670639038,
      "learning_rate": 0.00013972164059613252,
      "loss": 0.33,
      "step": 2452
    },
    {
      "epoch": 0.9058345642540621,
      "grad_norm": 0.24130357801914215,
      "learning_rate": 0.00013969700702056904,
      "loss": 0.2765,
      "step": 2453
    },
    {
      "epoch": 0.9062038404726735,
      "grad_norm": 0.37545138597488403,
      "learning_rate": 0.00013967237344500555,
      "loss": 0.3411,
      "step": 2454
    },
    {
      "epoch": 0.906573116691285,
      "grad_norm": 0.23208720982074738,
      "learning_rate": 0.00013964773986944207,
      "loss": 0.234,
      "step": 2455
    },
    {
      "epoch": 0.9069423929098966,
      "grad_norm": 0.2672305405139923,
      "learning_rate": 0.00013962310629387855,
      "loss": 0.3081,
      "step": 2456
    },
    {
      "epoch": 0.9073116691285081,
      "grad_norm": 0.23274806141853333,
      "learning_rate": 0.00013959847271831507,
      "loss": 0.2658,
      "step": 2457
    },
    {
      "epoch": 0.9076809453471196,
      "grad_norm": 0.3449879288673401,
      "learning_rate": 0.00013957383914275158,
      "loss": 0.3397,
      "step": 2458
    },
    {
      "epoch": 0.9080502215657311,
      "grad_norm": 0.30551835894584656,
      "learning_rate": 0.0001395492055671881,
      "loss": 0.3049,
      "step": 2459
    },
    {
      "epoch": 0.9084194977843427,
      "grad_norm": 0.2604008913040161,
      "learning_rate": 0.00013952457199162458,
      "loss": 0.273,
      "step": 2460
    },
    {
      "epoch": 0.9087887740029542,
      "grad_norm": 0.31938475370407104,
      "learning_rate": 0.0001394999384160611,
      "loss": 0.3564,
      "step": 2461
    },
    {
      "epoch": 0.9091580502215657,
      "grad_norm": 0.24833115935325623,
      "learning_rate": 0.0001394753048404976,
      "loss": 0.2765,
      "step": 2462
    },
    {
      "epoch": 0.9095273264401772,
      "grad_norm": 0.26162466406822205,
      "learning_rate": 0.00013945067126493413,
      "loss": 0.3201,
      "step": 2463
    },
    {
      "epoch": 0.9098966026587888,
      "grad_norm": 0.3023029565811157,
      "learning_rate": 0.00013942603768937062,
      "loss": 0.2832,
      "step": 2464
    },
    {
      "epoch": 0.9102658788774003,
      "grad_norm": 0.3238774836063385,
      "learning_rate": 0.00013940140411380713,
      "loss": 0.2566,
      "step": 2465
    },
    {
      "epoch": 0.9106351550960118,
      "grad_norm": 0.3158404529094696,
      "learning_rate": 0.00013937677053824362,
      "loss": 0.4155,
      "step": 2466
    },
    {
      "epoch": 0.9110044313146234,
      "grad_norm": 0.3102486729621887,
      "learning_rate": 0.00013935213696268013,
      "loss": 0.3817,
      "step": 2467
    },
    {
      "epoch": 0.9113737075332349,
      "grad_norm": 0.2413311004638672,
      "learning_rate": 0.00013932750338711665,
      "loss": 0.246,
      "step": 2468
    },
    {
      "epoch": 0.9117429837518464,
      "grad_norm": 0.28252607583999634,
      "learning_rate": 0.00013930286981155316,
      "loss": 0.2975,
      "step": 2469
    },
    {
      "epoch": 0.9121122599704579,
      "grad_norm": 0.264702707529068,
      "learning_rate": 0.00013927823623598965,
      "loss": 0.31,
      "step": 2470
    },
    {
      "epoch": 0.9124815361890695,
      "grad_norm": 0.31399446725845337,
      "learning_rate": 0.00013925360266042616,
      "loss": 0.2865,
      "step": 2471
    },
    {
      "epoch": 0.912850812407681,
      "grad_norm": 0.337719589471817,
      "learning_rate": 0.00013922896908486268,
      "loss": 0.3609,
      "step": 2472
    },
    {
      "epoch": 0.9132200886262924,
      "grad_norm": 0.3153814673423767,
      "learning_rate": 0.0001392043355092992,
      "loss": 0.3028,
      "step": 2473
    },
    {
      "epoch": 0.9135893648449039,
      "grad_norm": 0.30539634823799133,
      "learning_rate": 0.00013917970193373568,
      "loss": 0.3257,
      "step": 2474
    },
    {
      "epoch": 0.9139586410635155,
      "grad_norm": 0.25480708479881287,
      "learning_rate": 0.0001391550683581722,
      "loss": 0.3038,
      "step": 2475
    },
    {
      "epoch": 0.914327917282127,
      "grad_norm": 0.2393716275691986,
      "learning_rate": 0.0001391304347826087,
      "loss": 0.2296,
      "step": 2476
    },
    {
      "epoch": 0.9146971935007385,
      "grad_norm": 0.245378315448761,
      "learning_rate": 0.00013910580120704523,
      "loss": 0.2824,
      "step": 2477
    },
    {
      "epoch": 0.9150664697193501,
      "grad_norm": 0.3395783603191376,
      "learning_rate": 0.0001390811676314817,
      "loss": 0.4217,
      "step": 2478
    },
    {
      "epoch": 0.9154357459379616,
      "grad_norm": 0.25693923234939575,
      "learning_rate": 0.00013905653405591823,
      "loss": 0.3075,
      "step": 2479
    },
    {
      "epoch": 0.9158050221565731,
      "grad_norm": 0.2574548125267029,
      "learning_rate": 0.00013903190048035471,
      "loss": 0.2576,
      "step": 2480
    },
    {
      "epoch": 0.9161742983751846,
      "grad_norm": 0.22771596908569336,
      "learning_rate": 0.00013900726690479126,
      "loss": 0.2517,
      "step": 2481
    },
    {
      "epoch": 0.9165435745937962,
      "grad_norm": 0.27070626616477966,
      "learning_rate": 0.00013898263332922774,
      "loss": 0.3002,
      "step": 2482
    },
    {
      "epoch": 0.9169128508124077,
      "grad_norm": 0.2886691093444824,
      "learning_rate": 0.00013895799975366426,
      "loss": 0.2792,
      "step": 2483
    },
    {
      "epoch": 0.9172821270310192,
      "grad_norm": 0.2978097200393677,
      "learning_rate": 0.00013893336617810075,
      "loss": 0.3101,
      "step": 2484
    },
    {
      "epoch": 0.9176514032496307,
      "grad_norm": 0.33635851740837097,
      "learning_rate": 0.00013890873260253726,
      "loss": 0.3293,
      "step": 2485
    },
    {
      "epoch": 0.9180206794682423,
      "grad_norm": 0.31264153122901917,
      "learning_rate": 0.00013888409902697378,
      "loss": 0.3546,
      "step": 2486
    },
    {
      "epoch": 0.9183899556868538,
      "grad_norm": 0.25478553771972656,
      "learning_rate": 0.0001388594654514103,
      "loss": 0.2992,
      "step": 2487
    },
    {
      "epoch": 0.9187592319054653,
      "grad_norm": 0.23154157400131226,
      "learning_rate": 0.00013883483187584678,
      "loss": 0.2276,
      "step": 2488
    },
    {
      "epoch": 0.9191285081240768,
      "grad_norm": 0.2702298164367676,
      "learning_rate": 0.0001388101983002833,
      "loss": 0.3144,
      "step": 2489
    },
    {
      "epoch": 0.9194977843426884,
      "grad_norm": 0.2805127203464508,
      "learning_rate": 0.0001387855647247198,
      "loss": 0.3173,
      "step": 2490
    },
    {
      "epoch": 0.9198670605612999,
      "grad_norm": 0.3024933934211731,
      "learning_rate": 0.00013876093114915632,
      "loss": 0.2916,
      "step": 2491
    },
    {
      "epoch": 0.9202363367799113,
      "grad_norm": 0.24952004849910736,
      "learning_rate": 0.0001387362975735928,
      "loss": 0.2645,
      "step": 2492
    },
    {
      "epoch": 0.920605612998523,
      "grad_norm": 0.34727632999420166,
      "learning_rate": 0.00013871166399802932,
      "loss": 0.3491,
      "step": 2493
    },
    {
      "epoch": 0.9209748892171344,
      "grad_norm": 0.3407987058162689,
      "learning_rate": 0.0001386870304224658,
      "loss": 0.3503,
      "step": 2494
    },
    {
      "epoch": 0.9213441654357459,
      "grad_norm": 0.2808375954627991,
      "learning_rate": 0.00013866239684690235,
      "loss": 0.3076,
      "step": 2495
    },
    {
      "epoch": 0.9217134416543574,
      "grad_norm": 0.28790482878685,
      "learning_rate": 0.00013863776327133884,
      "loss": 0.313,
      "step": 2496
    },
    {
      "epoch": 0.922082717872969,
      "grad_norm": 0.24627193808555603,
      "learning_rate": 0.00013861312969577536,
      "loss": 0.2657,
      "step": 2497
    },
    {
      "epoch": 0.9224519940915805,
      "grad_norm": 0.3345617651939392,
      "learning_rate": 0.00013858849612021184,
      "loss": 0.323,
      "step": 2498
    },
    {
      "epoch": 0.922821270310192,
      "grad_norm": 0.28228136897087097,
      "learning_rate": 0.00013856386254464836,
      "loss": 0.3108,
      "step": 2499
    },
    {
      "epoch": 0.9231905465288035,
      "grad_norm": 0.23251014947891235,
      "learning_rate": 0.00013853922896908487,
      "loss": 0.2689,
      "step": 2500
    },
    {
      "epoch": 0.9231905465288035,
      "eval_loss": 0.2845548391342163,
      "eval_runtime": 5.8619,
      "eval_samples_per_second": 8.53,
      "eval_steps_per_second": 1.194,
      "step": 2500
    },
    {
      "epoch": 0.9235598227474151,
      "grad_norm": 0.33126166462898254,
      "learning_rate": 0.0001385145953935214,
      "loss": 0.3824,
      "step": 2501
    },
    {
      "epoch": 0.9239290989660266,
      "grad_norm": 0.2767648994922638,
      "learning_rate": 0.00013848996181795787,
      "loss": 0.3169,
      "step": 2502
    },
    {
      "epoch": 0.9242983751846381,
      "grad_norm": 0.2855761647224426,
      "learning_rate": 0.0001384653282423944,
      "loss": 0.3259,
      "step": 2503
    },
    {
      "epoch": 0.9246676514032496,
      "grad_norm": 0.22694745659828186,
      "learning_rate": 0.0001384406946668309,
      "loss": 0.258,
      "step": 2504
    },
    {
      "epoch": 0.9250369276218612,
      "grad_norm": 0.27971693873405457,
      "learning_rate": 0.00013841606109126742,
      "loss": 0.3251,
      "step": 2505
    },
    {
      "epoch": 0.9254062038404727,
      "grad_norm": 0.21657156944274902,
      "learning_rate": 0.0001383914275157039,
      "loss": 0.272,
      "step": 2506
    },
    {
      "epoch": 0.9257754800590842,
      "grad_norm": 0.2831554114818573,
      "learning_rate": 0.00013836679394014042,
      "loss": 0.2946,
      "step": 2507
    },
    {
      "epoch": 0.9261447562776958,
      "grad_norm": 0.258457213640213,
      "learning_rate": 0.00013834216036457694,
      "loss": 0.2844,
      "step": 2508
    },
    {
      "epoch": 0.9265140324963073,
      "grad_norm": 0.25451990962028503,
      "learning_rate": 0.00013831752678901345,
      "loss": 0.2208,
      "step": 2509
    },
    {
      "epoch": 0.9268833087149188,
      "grad_norm": 0.29237785935401917,
      "learning_rate": 0.00013829289321344994,
      "loss": 0.3336,
      "step": 2510
    },
    {
      "epoch": 0.9272525849335302,
      "grad_norm": 0.3132020831108093,
      "learning_rate": 0.00013826825963788645,
      "loss": 0.2769,
      "step": 2511
    },
    {
      "epoch": 0.9276218611521418,
      "grad_norm": 0.3423916697502136,
      "learning_rate": 0.00013824362606232294,
      "loss": 0.3043,
      "step": 2512
    },
    {
      "epoch": 0.9279911373707533,
      "grad_norm": 0.2983170449733734,
      "learning_rate": 0.00013821899248675948,
      "loss": 0.3227,
      "step": 2513
    },
    {
      "epoch": 0.9283604135893648,
      "grad_norm": 0.23354722559452057,
      "learning_rate": 0.00013819435891119597,
      "loss": 0.2706,
      "step": 2514
    },
    {
      "epoch": 0.9287296898079763,
      "grad_norm": 0.30000245571136475,
      "learning_rate": 0.00013816972533563248,
      "loss": 0.3278,
      "step": 2515
    },
    {
      "epoch": 0.9290989660265879,
      "grad_norm": 0.3025997579097748,
      "learning_rate": 0.00013814509176006897,
      "loss": 0.284,
      "step": 2516
    },
    {
      "epoch": 0.9294682422451994,
      "grad_norm": 0.24907605350017548,
      "learning_rate": 0.00013812045818450549,
      "loss": 0.2629,
      "step": 2517
    },
    {
      "epoch": 0.9298375184638109,
      "grad_norm": 0.29721975326538086,
      "learning_rate": 0.000138095824608942,
      "loss": 0.2942,
      "step": 2518
    },
    {
      "epoch": 0.9302067946824224,
      "grad_norm": 0.31587401032447815,
      "learning_rate": 0.00013807119103337851,
      "loss": 0.251,
      "step": 2519
    },
    {
      "epoch": 0.930576070901034,
      "grad_norm": 0.2655812203884125,
      "learning_rate": 0.000138046557457815,
      "loss": 0.2574,
      "step": 2520
    },
    {
      "epoch": 0.9309453471196455,
      "grad_norm": 0.2984577715396881,
      "learning_rate": 0.00013802192388225152,
      "loss": 0.3091,
      "step": 2521
    },
    {
      "epoch": 0.931314623338257,
      "grad_norm": 0.280851274728775,
      "learning_rate": 0.00013799729030668803,
      "loss": 0.2661,
      "step": 2522
    },
    {
      "epoch": 0.9316838995568686,
      "grad_norm": 0.3139093220233917,
      "learning_rate": 0.00013797265673112455,
      "loss": 0.3495,
      "step": 2523
    },
    {
      "epoch": 0.9320531757754801,
      "grad_norm": 0.25780218839645386,
      "learning_rate": 0.00013794802315556103,
      "loss": 0.2793,
      "step": 2524
    },
    {
      "epoch": 0.9324224519940916,
      "grad_norm": 0.37408342957496643,
      "learning_rate": 0.00013792338957999755,
      "loss": 0.2843,
      "step": 2525
    },
    {
      "epoch": 0.9327917282127031,
      "grad_norm": 0.23762372136116028,
      "learning_rate": 0.00013789875600443404,
      "loss": 0.231,
      "step": 2526
    },
    {
      "epoch": 0.9331610044313147,
      "grad_norm": 0.2505086362361908,
      "learning_rate": 0.00013787412242887058,
      "loss": 0.2322,
      "step": 2527
    },
    {
      "epoch": 0.9335302806499262,
      "grad_norm": 0.2948438227176666,
      "learning_rate": 0.00013784948885330707,
      "loss": 0.3642,
      "step": 2528
    },
    {
      "epoch": 0.9338995568685377,
      "grad_norm": 0.3976789712905884,
      "learning_rate": 0.00013782485527774358,
      "loss": 0.3192,
      "step": 2529
    },
    {
      "epoch": 0.9342688330871491,
      "grad_norm": 0.2831787168979645,
      "learning_rate": 0.00013780022170218007,
      "loss": 0.2827,
      "step": 2530
    },
    {
      "epoch": 0.9346381093057607,
      "grad_norm": 0.26242488622665405,
      "learning_rate": 0.00013777558812661658,
      "loss": 0.272,
      "step": 2531
    },
    {
      "epoch": 0.9350073855243722,
      "grad_norm": 0.26361218094825745,
      "learning_rate": 0.0001377509545510531,
      "loss": 0.2967,
      "step": 2532
    },
    {
      "epoch": 0.9353766617429837,
      "grad_norm": 0.23245660960674286,
      "learning_rate": 0.0001377263209754896,
      "loss": 0.2221,
      "step": 2533
    },
    {
      "epoch": 0.9357459379615952,
      "grad_norm": 0.2824196517467499,
      "learning_rate": 0.0001377016873999261,
      "loss": 0.2967,
      "step": 2534
    },
    {
      "epoch": 0.9361152141802068,
      "grad_norm": 0.2880588471889496,
      "learning_rate": 0.0001376770538243626,
      "loss": 0.3025,
      "step": 2535
    },
    {
      "epoch": 0.9364844903988183,
      "grad_norm": 0.20747391879558563,
      "learning_rate": 0.00013765242024879913,
      "loss": 0.2493,
      "step": 2536
    },
    {
      "epoch": 0.9368537666174298,
      "grad_norm": 0.3134663701057434,
      "learning_rate": 0.00013762778667323564,
      "loss": 0.3315,
      "step": 2537
    },
    {
      "epoch": 0.9372230428360414,
      "grad_norm": 0.27924367785453796,
      "learning_rate": 0.00013760315309767213,
      "loss": 0.2743,
      "step": 2538
    },
    {
      "epoch": 0.9375923190546529,
      "grad_norm": 0.26552537083625793,
      "learning_rate": 0.00013757851952210864,
      "loss": 0.2625,
      "step": 2539
    },
    {
      "epoch": 0.9379615952732644,
      "grad_norm": 0.3043096959590912,
      "learning_rate": 0.00013755388594654516,
      "loss": 0.377,
      "step": 2540
    },
    {
      "epoch": 0.9383308714918759,
      "grad_norm": 0.26502060890197754,
      "learning_rate": 0.00013752925237098167,
      "loss": 0.3197,
      "step": 2541
    },
    {
      "epoch": 0.9387001477104875,
      "grad_norm": 0.2906153202056885,
      "learning_rate": 0.00013750461879541816,
      "loss": 0.2946,
      "step": 2542
    },
    {
      "epoch": 0.939069423929099,
      "grad_norm": 0.25271281599998474,
      "learning_rate": 0.00013747998521985468,
      "loss": 0.2851,
      "step": 2543
    },
    {
      "epoch": 0.9394387001477105,
      "grad_norm": 0.328204482793808,
      "learning_rate": 0.00013745535164429116,
      "loss": 0.3346,
      "step": 2544
    },
    {
      "epoch": 0.939807976366322,
      "grad_norm": 0.2681902050971985,
      "learning_rate": 0.0001374307180687277,
      "loss": 0.3299,
      "step": 2545
    },
    {
      "epoch": 0.9401772525849336,
      "grad_norm": 0.2553021311759949,
      "learning_rate": 0.0001374060844931642,
      "loss": 0.3179,
      "step": 2546
    },
    {
      "epoch": 0.9405465288035451,
      "grad_norm": 0.28603696823120117,
      "learning_rate": 0.0001373814509176007,
      "loss": 0.3275,
      "step": 2547
    },
    {
      "epoch": 0.9409158050221565,
      "grad_norm": 0.29382210969924927,
      "learning_rate": 0.0001373568173420372,
      "loss": 0.3132,
      "step": 2548
    },
    {
      "epoch": 0.941285081240768,
      "grad_norm": 0.29052960872650146,
      "learning_rate": 0.0001373321837664737,
      "loss": 0.324,
      "step": 2549
    },
    {
      "epoch": 0.9416543574593796,
      "grad_norm": 0.284537136554718,
      "learning_rate": 0.00013730755019091022,
      "loss": 0.3597,
      "step": 2550
    },
    {
      "epoch": 0.9416543574593796,
      "eval_loss": 0.2878875434398651,
      "eval_runtime": 5.8513,
      "eval_samples_per_second": 8.545,
      "eval_steps_per_second": 1.196,
      "step": 2550
    },
    {
      "epoch": 0.9420236336779911,
      "grad_norm": 0.2447899580001831,
      "learning_rate": 0.00013728291661534674,
      "loss": 0.244,
      "step": 2551
    },
    {
      "epoch": 0.9423929098966026,
      "grad_norm": 0.2734544277191162,
      "learning_rate": 0.00013725828303978323,
      "loss": 0.2923,
      "step": 2552
    },
    {
      "epoch": 0.9427621861152142,
      "grad_norm": 0.26823151111602783,
      "learning_rate": 0.00013723364946421974,
      "loss": 0.3097,
      "step": 2553
    },
    {
      "epoch": 0.9431314623338257,
      "grad_norm": 0.21306352317333221,
      "learning_rate": 0.00013720901588865626,
      "loss": 0.2454,
      "step": 2554
    },
    {
      "epoch": 0.9435007385524372,
      "grad_norm": 0.23448294401168823,
      "learning_rate": 0.00013718438231309277,
      "loss": 0.3058,
      "step": 2555
    },
    {
      "epoch": 0.9438700147710487,
      "grad_norm": 0.2050745040178299,
      "learning_rate": 0.00013715974873752926,
      "loss": 0.231,
      "step": 2556
    },
    {
      "epoch": 0.9442392909896603,
      "grad_norm": 0.28681740164756775,
      "learning_rate": 0.00013713511516196575,
      "loss": 0.32,
      "step": 2557
    },
    {
      "epoch": 0.9446085672082718,
      "grad_norm": 0.3256187438964844,
      "learning_rate": 0.00013711048158640226,
      "loss": 0.3114,
      "step": 2558
    },
    {
      "epoch": 0.9449778434268833,
      "grad_norm": 0.4193253517150879,
      "learning_rate": 0.00013708584801083878,
      "loss": 0.4179,
      "step": 2559
    },
    {
      "epoch": 0.9453471196454948,
      "grad_norm": 0.20358948409557343,
      "learning_rate": 0.0001370612144352753,
      "loss": 0.2017,
      "step": 2560
    },
    {
      "epoch": 0.9457163958641064,
      "grad_norm": 0.25791874527931213,
      "learning_rate": 0.00013703658085971178,
      "loss": 0.2399,
      "step": 2561
    },
    {
      "epoch": 0.9460856720827179,
      "grad_norm": 0.21454410254955292,
      "learning_rate": 0.0001370119472841483,
      "loss": 0.2495,
      "step": 2562
    },
    {
      "epoch": 0.9464549483013294,
      "grad_norm": 0.28488028049468994,
      "learning_rate": 0.0001369873137085848,
      "loss": 0.351,
      "step": 2563
    },
    {
      "epoch": 0.946824224519941,
      "grad_norm": 0.33706241846084595,
      "learning_rate": 0.00013696268013302132,
      "loss": 0.3406,
      "step": 2564
    },
    {
      "epoch": 0.9471935007385525,
      "grad_norm": 0.27496975660324097,
      "learning_rate": 0.0001369380465574578,
      "loss": 0.2703,
      "step": 2565
    },
    {
      "epoch": 0.947562776957164,
      "grad_norm": 0.28031080961227417,
      "learning_rate": 0.00013691341298189432,
      "loss": 0.2589,
      "step": 2566
    },
    {
      "epoch": 0.9479320531757754,
      "grad_norm": 0.3087734580039978,
      "learning_rate": 0.00013688877940633084,
      "loss": 0.3214,
      "step": 2567
    },
    {
      "epoch": 0.948301329394387,
      "grad_norm": 0.20123820006847382,
      "learning_rate": 0.00013686414583076735,
      "loss": 0.2379,
      "step": 2568
    },
    {
      "epoch": 0.9486706056129985,
      "grad_norm": 0.26751509308815,
      "learning_rate": 0.00013683951225520384,
      "loss": 0.3167,
      "step": 2569
    },
    {
      "epoch": 0.94903988183161,
      "grad_norm": 0.35837164521217346,
      "learning_rate": 0.00013681487867964035,
      "loss": 0.2686,
      "step": 2570
    },
    {
      "epoch": 0.9494091580502215,
      "grad_norm": 0.36316734552383423,
      "learning_rate": 0.00013679024510407684,
      "loss": 0.3615,
      "step": 2571
    },
    {
      "epoch": 0.9497784342688331,
      "grad_norm": 0.22303825616836548,
      "learning_rate": 0.00013676561152851338,
      "loss": 0.245,
      "step": 2572
    },
    {
      "epoch": 0.9501477104874446,
      "grad_norm": 0.3745732605457306,
      "learning_rate": 0.00013674097795294987,
      "loss": 0.3863,
      "step": 2573
    },
    {
      "epoch": 0.9505169867060561,
      "grad_norm": 0.2717512547969818,
      "learning_rate": 0.00013671634437738639,
      "loss": 0.2673,
      "step": 2574
    },
    {
      "epoch": 0.9508862629246676,
      "grad_norm": 0.27752625942230225,
      "learning_rate": 0.00013669171080182287,
      "loss": 0.277,
      "step": 2575
    },
    {
      "epoch": 0.9512555391432792,
      "grad_norm": 0.2683650255203247,
      "learning_rate": 0.0001366670772262594,
      "loss": 0.3122,
      "step": 2576
    },
    {
      "epoch": 0.9516248153618907,
      "grad_norm": 0.22293895483016968,
      "learning_rate": 0.0001366424436506959,
      "loss": 0.2263,
      "step": 2577
    },
    {
      "epoch": 0.9519940915805022,
      "grad_norm": 0.3275323808193207,
      "learning_rate": 0.00013661781007513242,
      "loss": 0.3681,
      "step": 2578
    },
    {
      "epoch": 0.9523633677991138,
      "grad_norm": 0.29128924012184143,
      "learning_rate": 0.0001365931764995689,
      "loss": 0.3474,
      "step": 2579
    },
    {
      "epoch": 0.9527326440177253,
      "grad_norm": 0.2997452914714813,
      "learning_rate": 0.00013656854292400542,
      "loss": 0.2639,
      "step": 2580
    },
    {
      "epoch": 0.9531019202363368,
      "grad_norm": 0.30281564593315125,
      "learning_rate": 0.00013654390934844193,
      "loss": 0.297,
      "step": 2581
    },
    {
      "epoch": 0.9534711964549483,
      "grad_norm": 0.23481415212154388,
      "learning_rate": 0.00013651927577287845,
      "loss": 0.2492,
      "step": 2582
    },
    {
      "epoch": 0.9538404726735599,
      "grad_norm": 0.28554078936576843,
      "learning_rate": 0.00013649464219731494,
      "loss": 0.2916,
      "step": 2583
    },
    {
      "epoch": 0.9542097488921714,
      "grad_norm": 0.27036115527153015,
      "learning_rate": 0.00013647000862175145,
      "loss": 0.2819,
      "step": 2584
    },
    {
      "epoch": 0.9545790251107829,
      "grad_norm": 0.26153528690338135,
      "learning_rate": 0.00013644537504618794,
      "loss": 0.2842,
      "step": 2585
    },
    {
      "epoch": 0.9549483013293943,
      "grad_norm": 0.436210960149765,
      "learning_rate": 0.00013642074147062448,
      "loss": 0.2785,
      "step": 2586
    },
    {
      "epoch": 0.955317577548006,
      "grad_norm": 0.25508248805999756,
      "learning_rate": 0.00013639610789506097,
      "loss": 0.2806,
      "step": 2587
    },
    {
      "epoch": 0.9556868537666174,
      "grad_norm": 0.2849658131599426,
      "learning_rate": 0.00013637147431949748,
      "loss": 0.2585,
      "step": 2588
    },
    {
      "epoch": 0.9560561299852289,
      "grad_norm": 0.336910605430603,
      "learning_rate": 0.00013634684074393397,
      "loss": 0.3214,
      "step": 2589
    },
    {
      "epoch": 0.9564254062038404,
      "grad_norm": 0.29144784808158875,
      "learning_rate": 0.00013632220716837049,
      "loss": 0.2821,
      "step": 2590
    },
    {
      "epoch": 0.956794682422452,
      "grad_norm": 0.2194124311208725,
      "learning_rate": 0.000136297573592807,
      "loss": 0.2094,
      "step": 2591
    },
    {
      "epoch": 0.9571639586410635,
      "grad_norm": 0.296017050743103,
      "learning_rate": 0.00013627294001724351,
      "loss": 0.2934,
      "step": 2592
    },
    {
      "epoch": 0.957533234859675,
      "grad_norm": 0.28335466980934143,
      "learning_rate": 0.00013624830644168,
      "loss": 0.3347,
      "step": 2593
    },
    {
      "epoch": 0.9579025110782866,
      "grad_norm": 0.27571043372154236,
      "learning_rate": 0.00013622367286611652,
      "loss": 0.2478,
      "step": 2594
    },
    {
      "epoch": 0.9582717872968981,
      "grad_norm": 0.24105633795261383,
      "learning_rate": 0.00013619903929055303,
      "loss": 0.2344,
      "step": 2595
    },
    {
      "epoch": 0.9586410635155096,
      "grad_norm": 0.30373385548591614,
      "learning_rate": 0.00013617440571498955,
      "loss": 0.3435,
      "step": 2596
    },
    {
      "epoch": 0.9590103397341211,
      "grad_norm": 0.2644283175468445,
      "learning_rate": 0.00013614977213942603,
      "loss": 0.2687,
      "step": 2597
    },
    {
      "epoch": 0.9593796159527327,
      "grad_norm": 0.33212679624557495,
      "learning_rate": 0.00013612513856386255,
      "loss": 0.3084,
      "step": 2598
    },
    {
      "epoch": 0.9597488921713442,
      "grad_norm": 0.29163432121276855,
      "learning_rate": 0.00013610050498829906,
      "loss": 0.3383,
      "step": 2599
    },
    {
      "epoch": 0.9601181683899557,
      "grad_norm": 0.31650787591934204,
      "learning_rate": 0.00013607587141273558,
      "loss": 0.3769,
      "step": 2600
    },
    {
      "epoch": 0.9601181683899557,
      "eval_loss": 0.279488205909729,
      "eval_runtime": 5.8583,
      "eval_samples_per_second": 8.535,
      "eval_steps_per_second": 1.195,
      "step": 2600
    },
    {
      "epoch": 0.9604874446085672,
      "grad_norm": 0.33627936244010925,
      "learning_rate": 0.00013605123783717206,
      "loss": 0.3784,
      "step": 2601
    },
    {
      "epoch": 0.9608567208271788,
      "grad_norm": 0.3369787037372589,
      "learning_rate": 0.00013602660426160858,
      "loss": 0.3478,
      "step": 2602
    },
    {
      "epoch": 0.9612259970457903,
      "grad_norm": 0.266859233379364,
      "learning_rate": 0.00013600197068604507,
      "loss": 0.2878,
      "step": 2603
    },
    {
      "epoch": 0.9615952732644018,
      "grad_norm": 0.28313395380973816,
      "learning_rate": 0.0001359773371104816,
      "loss": 0.2815,
      "step": 2604
    },
    {
      "epoch": 0.9619645494830132,
      "grad_norm": 0.2932313084602356,
      "learning_rate": 0.0001359527035349181,
      "loss": 0.281,
      "step": 2605
    },
    {
      "epoch": 0.9623338257016248,
      "grad_norm": 0.2748316526412964,
      "learning_rate": 0.0001359280699593546,
      "loss": 0.3368,
      "step": 2606
    },
    {
      "epoch": 0.9627031019202363,
      "grad_norm": 0.24314384162425995,
      "learning_rate": 0.0001359034363837911,
      "loss": 0.2786,
      "step": 2607
    },
    {
      "epoch": 0.9630723781388478,
      "grad_norm": 0.2865593433380127,
      "learning_rate": 0.0001358788028082276,
      "loss": 0.3516,
      "step": 2608
    },
    {
      "epoch": 0.9634416543574594,
      "grad_norm": 0.275774210691452,
      "learning_rate": 0.00013585416923266413,
      "loss": 0.2796,
      "step": 2609
    },
    {
      "epoch": 0.9638109305760709,
      "grad_norm": 0.27282050251960754,
      "learning_rate": 0.00013582953565710064,
      "loss": 0.2952,
      "step": 2610
    },
    {
      "epoch": 0.9641802067946824,
      "grad_norm": 0.2969392240047455,
      "learning_rate": 0.00013580490208153713,
      "loss": 0.316,
      "step": 2611
    },
    {
      "epoch": 0.9645494830132939,
      "grad_norm": 0.2910168170928955,
      "learning_rate": 0.00013578026850597364,
      "loss": 0.2658,
      "step": 2612
    },
    {
      "epoch": 0.9649187592319055,
      "grad_norm": 0.2322939783334732,
      "learning_rate": 0.00013575563493041016,
      "loss": 0.2802,
      "step": 2613
    },
    {
      "epoch": 0.965288035450517,
      "grad_norm": 0.3182610869407654,
      "learning_rate": 0.00013573100135484667,
      "loss": 0.3148,
      "step": 2614
    },
    {
      "epoch": 0.9656573116691285,
      "grad_norm": 0.2963951528072357,
      "learning_rate": 0.00013570636777928316,
      "loss": 0.3067,
      "step": 2615
    },
    {
      "epoch": 0.96602658788774,
      "grad_norm": 0.27038127183914185,
      "learning_rate": 0.00013568173420371968,
      "loss": 0.3095,
      "step": 2616
    },
    {
      "epoch": 0.9663958641063516,
      "grad_norm": 0.23234814405441284,
      "learning_rate": 0.00013565710062815616,
      "loss": 0.3002,
      "step": 2617
    },
    {
      "epoch": 0.9667651403249631,
      "grad_norm": 0.3561446964740753,
      "learning_rate": 0.0001356324670525927,
      "loss": 0.372,
      "step": 2618
    },
    {
      "epoch": 0.9671344165435746,
      "grad_norm": 0.39320874214172363,
      "learning_rate": 0.0001356078334770292,
      "loss": 0.3569,
      "step": 2619
    },
    {
      "epoch": 0.9675036927621861,
      "grad_norm": 0.2911607325077057,
      "learning_rate": 0.0001355831999014657,
      "loss": 0.2821,
      "step": 2620
    },
    {
      "epoch": 0.9678729689807977,
      "grad_norm": 0.26914578676223755,
      "learning_rate": 0.0001355585663259022,
      "loss": 0.2815,
      "step": 2621
    },
    {
      "epoch": 0.9682422451994092,
      "grad_norm": 0.28479838371276855,
      "learning_rate": 0.0001355339327503387,
      "loss": 0.289,
      "step": 2622
    },
    {
      "epoch": 0.9686115214180206,
      "grad_norm": 0.25384166836738586,
      "learning_rate": 0.00013550929917477522,
      "loss": 0.2992,
      "step": 2623
    },
    {
      "epoch": 0.9689807976366323,
      "grad_norm": 0.2510720193386078,
      "learning_rate": 0.00013548466559921174,
      "loss": 0.3051,
      "step": 2624
    },
    {
      "epoch": 0.9693500738552437,
      "grad_norm": 0.26602286100387573,
      "learning_rate": 0.00013546003202364823,
      "loss": 0.2962,
      "step": 2625
    },
    {
      "epoch": 0.9697193500738552,
      "grad_norm": 0.2997296452522278,
      "learning_rate": 0.00013543539844808474,
      "loss": 0.3212,
      "step": 2626
    },
    {
      "epoch": 0.9700886262924667,
      "grad_norm": 0.236396923661232,
      "learning_rate": 0.00013541076487252126,
      "loss": 0.258,
      "step": 2627
    },
    {
      "epoch": 0.9704579025110783,
      "grad_norm": 0.2335156798362732,
      "learning_rate": 0.00013538613129695777,
      "loss": 0.2814,
      "step": 2628
    },
    {
      "epoch": 0.9708271787296898,
      "grad_norm": 0.27555787563323975,
      "learning_rate": 0.00013536149772139426,
      "loss": 0.2626,
      "step": 2629
    },
    {
      "epoch": 0.9711964549483013,
      "grad_norm": 0.2781428098678589,
      "learning_rate": 0.00013533686414583077,
      "loss": 0.2911,
      "step": 2630
    },
    {
      "epoch": 0.9715657311669128,
      "grad_norm": 0.3219570219516754,
      "learning_rate": 0.00013531223057026726,
      "loss": 0.3326,
      "step": 2631
    },
    {
      "epoch": 0.9719350073855244,
      "grad_norm": 0.2987314760684967,
      "learning_rate": 0.0001352875969947038,
      "loss": 0.3199,
      "step": 2632
    },
    {
      "epoch": 0.9723042836041359,
      "grad_norm": 0.34429308772087097,
      "learning_rate": 0.0001352629634191403,
      "loss": 0.3914,
      "step": 2633
    },
    {
      "epoch": 0.9726735598227474,
      "grad_norm": 0.31119295954704285,
      "learning_rate": 0.0001352383298435768,
      "loss": 0.3947,
      "step": 2634
    },
    {
      "epoch": 0.9730428360413589,
      "grad_norm": 0.3772371709346771,
      "learning_rate": 0.0001352136962680133,
      "loss": 0.351,
      "step": 2635
    },
    {
      "epoch": 0.9734121122599705,
      "grad_norm": 0.40727177262306213,
      "learning_rate": 0.0001351890626924498,
      "loss": 0.3305,
      "step": 2636
    },
    {
      "epoch": 0.973781388478582,
      "grad_norm": 0.29313406348228455,
      "learning_rate": 0.00013516442911688632,
      "loss": 0.2813,
      "step": 2637
    },
    {
      "epoch": 0.9741506646971935,
      "grad_norm": 0.25514623522758484,
      "learning_rate": 0.00013513979554132284,
      "loss": 0.27,
      "step": 2638
    },
    {
      "epoch": 0.9745199409158051,
      "grad_norm": 0.28271159529685974,
      "learning_rate": 0.00013511516196575932,
      "loss": 0.3027,
      "step": 2639
    },
    {
      "epoch": 0.9748892171344166,
      "grad_norm": 0.2886260151863098,
      "learning_rate": 0.00013509052839019584,
      "loss": 0.3226,
      "step": 2640
    },
    {
      "epoch": 0.975258493353028,
      "grad_norm": 0.25844526290893555,
      "learning_rate": 0.00013506589481463235,
      "loss": 0.3192,
      "step": 2641
    },
    {
      "epoch": 0.9756277695716395,
      "grad_norm": 0.278309166431427,
      "learning_rate": 0.00013504126123906887,
      "loss": 0.3334,
      "step": 2642
    },
    {
      "epoch": 0.9759970457902511,
      "grad_norm": 0.3024517893791199,
      "learning_rate": 0.00013501662766350535,
      "loss": 0.3601,
      "step": 2643
    },
    {
      "epoch": 0.9763663220088626,
      "grad_norm": 0.29122552275657654,
      "learning_rate": 0.00013499199408794187,
      "loss": 0.3112,
      "step": 2644
    },
    {
      "epoch": 0.9767355982274741,
      "grad_norm": 0.3468911945819855,
      "learning_rate": 0.00013496736051237838,
      "loss": 0.2789,
      "step": 2645
    },
    {
      "epoch": 0.9771048744460856,
      "grad_norm": 0.30402520298957825,
      "learning_rate": 0.0001349427269368149,
      "loss": 0.3215,
      "step": 2646
    },
    {
      "epoch": 0.9774741506646972,
      "grad_norm": 0.2312483787536621,
      "learning_rate": 0.00013491809336125139,
      "loss": 0.2599,
      "step": 2647
    },
    {
      "epoch": 0.9778434268833087,
      "grad_norm": 0.23150765895843506,
      "learning_rate": 0.0001348934597856879,
      "loss": 0.2457,
      "step": 2648
    },
    {
      "epoch": 0.9782127031019202,
      "grad_norm": 0.25944799184799194,
      "learning_rate": 0.0001348688262101244,
      "loss": 0.2969,
      "step": 2649
    },
    {
      "epoch": 0.9785819793205317,
      "grad_norm": 0.3412152826786041,
      "learning_rate": 0.00013484419263456093,
      "loss": 0.367,
      "step": 2650
    },
    {
      "epoch": 0.9785819793205317,
      "eval_loss": 0.28251588344573975,
      "eval_runtime": 5.8648,
      "eval_samples_per_second": 8.525,
      "eval_steps_per_second": 1.194,
      "step": 2650
    },
    {
      "epoch": 0.9789512555391433,
      "grad_norm": 0.29037943482398987,
      "learning_rate": 0.00013481955905899742,
      "loss": 0.2868,
      "step": 2651
    },
    {
      "epoch": 0.9793205317577548,
      "grad_norm": 0.33751970529556274,
      "learning_rate": 0.00013479492548343393,
      "loss": 0.3976,
      "step": 2652
    },
    {
      "epoch": 0.9796898079763663,
      "grad_norm": 0.2732155919075012,
      "learning_rate": 0.00013477029190787042,
      "loss": 0.299,
      "step": 2653
    },
    {
      "epoch": 0.9800590841949779,
      "grad_norm": 0.2407206892967224,
      "learning_rate": 0.00013474565833230693,
      "loss": 0.2598,
      "step": 2654
    },
    {
      "epoch": 0.9804283604135894,
      "grad_norm": 0.30155372619628906,
      "learning_rate": 0.00013472102475674345,
      "loss": 0.285,
      "step": 2655
    },
    {
      "epoch": 0.9807976366322009,
      "grad_norm": 0.24309593439102173,
      "learning_rate": 0.00013469639118117996,
      "loss": 0.2762,
      "step": 2656
    },
    {
      "epoch": 0.9811669128508124,
      "grad_norm": 0.25054389238357544,
      "learning_rate": 0.00013467175760561645,
      "loss": 0.2697,
      "step": 2657
    },
    {
      "epoch": 0.981536189069424,
      "grad_norm": 0.3400585353374481,
      "learning_rate": 0.00013464712403005297,
      "loss": 0.3121,
      "step": 2658
    },
    {
      "epoch": 0.9819054652880355,
      "grad_norm": 0.39103445410728455,
      "learning_rate": 0.00013462249045448948,
      "loss": 0.3605,
      "step": 2659
    },
    {
      "epoch": 0.982274741506647,
      "grad_norm": 0.30087360739707947,
      "learning_rate": 0.000134597856878926,
      "loss": 0.2957,
      "step": 2660
    },
    {
      "epoch": 0.9826440177252584,
      "grad_norm": 0.29284095764160156,
      "learning_rate": 0.00013457322330336248,
      "loss": 0.2918,
      "step": 2661
    },
    {
      "epoch": 0.98301329394387,
      "grad_norm": 0.2765386402606964,
      "learning_rate": 0.000134548589727799,
      "loss": 0.3137,
      "step": 2662
    },
    {
      "epoch": 0.9833825701624815,
      "grad_norm": 0.24506525695323944,
      "learning_rate": 0.00013452395615223548,
      "loss": 0.2809,
      "step": 2663
    },
    {
      "epoch": 0.983751846381093,
      "grad_norm": 0.25712794065475464,
      "learning_rate": 0.00013449932257667203,
      "loss": 0.2752,
      "step": 2664
    },
    {
      "epoch": 0.9841211225997046,
      "grad_norm": 0.3054530620574951,
      "learning_rate": 0.00013447468900110851,
      "loss": 0.2917,
      "step": 2665
    },
    {
      "epoch": 0.9844903988183161,
      "grad_norm": 0.2664269506931305,
      "learning_rate": 0.00013445005542554503,
      "loss": 0.2626,
      "step": 2666
    },
    {
      "epoch": 0.9848596750369276,
      "grad_norm": 0.29204151034355164,
      "learning_rate": 0.00013442542184998152,
      "loss": 0.3547,
      "step": 2667
    },
    {
      "epoch": 0.9852289512555391,
      "grad_norm": 0.26726964116096497,
      "learning_rate": 0.00013440078827441803,
      "loss": 0.2614,
      "step": 2668
    },
    {
      "epoch": 0.9855982274741507,
      "grad_norm": 0.3160478472709656,
      "learning_rate": 0.00013437615469885455,
      "loss": 0.2919,
      "step": 2669
    },
    {
      "epoch": 0.9859675036927622,
      "grad_norm": 0.2872214913368225,
      "learning_rate": 0.00013435152112329106,
      "loss": 0.321,
      "step": 2670
    },
    {
      "epoch": 0.9863367799113737,
      "grad_norm": 0.2202260047197342,
      "learning_rate": 0.00013432688754772755,
      "loss": 0.2732,
      "step": 2671
    },
    {
      "epoch": 0.9867060561299852,
      "grad_norm": 0.30726248025894165,
      "learning_rate": 0.00013430225397216406,
      "loss": 0.3071,
      "step": 2672
    },
    {
      "epoch": 0.9870753323485968,
      "grad_norm": 0.2842807471752167,
      "learning_rate": 0.00013427762039660058,
      "loss": 0.3329,
      "step": 2673
    },
    {
      "epoch": 0.9874446085672083,
      "grad_norm": 0.29567378759384155,
      "learning_rate": 0.0001342529868210371,
      "loss": 0.2524,
      "step": 2674
    },
    {
      "epoch": 0.9878138847858198,
      "grad_norm": 0.29411858320236206,
      "learning_rate": 0.00013422835324547358,
      "loss": 0.2778,
      "step": 2675
    },
    {
      "epoch": 0.9881831610044313,
      "grad_norm": 0.24854087829589844,
      "learning_rate": 0.0001342037196699101,
      "loss": 0.2744,
      "step": 2676
    },
    {
      "epoch": 0.9885524372230429,
      "grad_norm": 0.2956259846687317,
      "learning_rate": 0.0001341790860943466,
      "loss": 0.272,
      "step": 2677
    },
    {
      "epoch": 0.9889217134416544,
      "grad_norm": 0.2957102060317993,
      "learning_rate": 0.00013415445251878312,
      "loss": 0.3295,
      "step": 2678
    },
    {
      "epoch": 0.9892909896602659,
      "grad_norm": 0.31470787525177,
      "learning_rate": 0.0001341298189432196,
      "loss": 0.3134,
      "step": 2679
    },
    {
      "epoch": 0.9896602658788775,
      "grad_norm": 0.31523028016090393,
      "learning_rate": 0.00013410518536765612,
      "loss": 0.3438,
      "step": 2680
    },
    {
      "epoch": 0.990029542097489,
      "grad_norm": 0.2612340748310089,
      "learning_rate": 0.0001340805517920926,
      "loss": 0.2339,
      "step": 2681
    },
    {
      "epoch": 0.9903988183161004,
      "grad_norm": 0.26308852434158325,
      "learning_rate": 0.00013405591821652915,
      "loss": 0.2653,
      "step": 2682
    },
    {
      "epoch": 0.9907680945347119,
      "grad_norm": 0.2718425691127777,
      "learning_rate": 0.00013403128464096564,
      "loss": 0.3107,
      "step": 2683
    },
    {
      "epoch": 0.9911373707533235,
      "grad_norm": 0.33294862508773804,
      "learning_rate": 0.00013400665106540216,
      "loss": 0.324,
      "step": 2684
    },
    {
      "epoch": 0.991506646971935,
      "grad_norm": 0.3325161635875702,
      "learning_rate": 0.00013398201748983864,
      "loss": 0.3826,
      "step": 2685
    },
    {
      "epoch": 0.9918759231905465,
      "grad_norm": 0.2503698766231537,
      "learning_rate": 0.00013395738391427516,
      "loss": 0.3366,
      "step": 2686
    },
    {
      "epoch": 0.992245199409158,
      "grad_norm": 0.27409908175468445,
      "learning_rate": 0.00013393275033871167,
      "loss": 0.2913,
      "step": 2687
    },
    {
      "epoch": 0.9926144756277696,
      "grad_norm": 0.3058303892612457,
      "learning_rate": 0.0001339081167631482,
      "loss": 0.3015,
      "step": 2688
    },
    {
      "epoch": 0.9929837518463811,
      "grad_norm": 0.2649807333946228,
      "learning_rate": 0.00013388348318758468,
      "loss": 0.3243,
      "step": 2689
    },
    {
      "epoch": 0.9933530280649926,
      "grad_norm": 0.249671071767807,
      "learning_rate": 0.0001338588496120212,
      "loss": 0.291,
      "step": 2690
    },
    {
      "epoch": 0.9937223042836041,
      "grad_norm": 0.2473740130662918,
      "learning_rate": 0.0001338342160364577,
      "loss": 0.3107,
      "step": 2691
    },
    {
      "epoch": 0.9940915805022157,
      "grad_norm": 0.2776089310646057,
      "learning_rate": 0.00013380958246089422,
      "loss": 0.3164,
      "step": 2692
    },
    {
      "epoch": 0.9944608567208272,
      "grad_norm": 0.2375720590353012,
      "learning_rate": 0.0001337849488853307,
      "loss": 0.241,
      "step": 2693
    },
    {
      "epoch": 0.9948301329394387,
      "grad_norm": 0.24300681054592133,
      "learning_rate": 0.00013376031530976722,
      "loss": 0.2543,
      "step": 2694
    },
    {
      "epoch": 0.9951994091580503,
      "grad_norm": 0.2924548387527466,
      "learning_rate": 0.0001337356817342037,
      "loss": 0.3668,
      "step": 2695
    },
    {
      "epoch": 0.9955686853766618,
      "grad_norm": 0.24749398231506348,
      "learning_rate": 0.00013371104815864025,
      "loss": 0.2591,
      "step": 2696
    },
    {
      "epoch": 0.9959379615952733,
      "grad_norm": 0.24914845824241638,
      "learning_rate": 0.00013368641458307674,
      "loss": 0.2659,
      "step": 2697
    },
    {
      "epoch": 0.9963072378138847,
      "grad_norm": 0.27471911907196045,
      "learning_rate": 0.00013366178100751325,
      "loss": 0.2832,
      "step": 2698
    },
    {
      "epoch": 0.9966765140324964,
      "grad_norm": 0.27133798599243164,
      "learning_rate": 0.00013363714743194974,
      "loss": 0.2787,
      "step": 2699
    },
    {
      "epoch": 0.9970457902511078,
      "grad_norm": 0.24822653830051422,
      "learning_rate": 0.00013361251385638626,
      "loss": 0.2716,
      "step": 2700
    },
    {
      "epoch": 0.9970457902511078,
      "eval_loss": 0.274232417345047,
      "eval_runtime": 5.8623,
      "eval_samples_per_second": 8.529,
      "eval_steps_per_second": 1.194,
      "step": 2700
    },
    {
      "epoch": 0.9974150664697193,
      "grad_norm": 0.261340856552124,
      "learning_rate": 0.00013358788028082277,
      "loss": 0.2797,
      "step": 2701
    },
    {
      "epoch": 0.9977843426883308,
      "grad_norm": 0.2826089859008789,
      "learning_rate": 0.00013356324670525928,
      "loss": 0.3014,
      "step": 2702
    },
    {
      "epoch": 0.9981536189069424,
      "grad_norm": 0.3125585913658142,
      "learning_rate": 0.00013353861312969577,
      "loss": 0.3643,
      "step": 2703
    },
    {
      "epoch": 0.9985228951255539,
      "grad_norm": 0.3222525417804718,
      "learning_rate": 0.0001335139795541323,
      "loss": 0.3355,
      "step": 2704
    },
    {
      "epoch": 0.9988921713441654,
      "grad_norm": 0.2628626525402069,
      "learning_rate": 0.0001334893459785688,
      "loss": 0.2584,
      "step": 2705
    },
    {
      "epoch": 0.9992614475627769,
      "grad_norm": 0.28646916151046753,
      "learning_rate": 0.00013346471240300532,
      "loss": 0.288,
      "step": 2706
    },
    {
      "epoch": 0.9996307237813885,
      "grad_norm": 0.27381405234336853,
      "learning_rate": 0.0001334400788274418,
      "loss": 0.2406,
      "step": 2707
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.41870248317718506,
      "learning_rate": 0.00013341544525187832,
      "loss": 0.3323,
      "step": 2708
    },
    {
      "epoch": 1.0003692762186116,
      "grad_norm": 0.2796136438846588,
      "learning_rate": 0.00013339081167631483,
      "loss": 0.2912,
      "step": 2709
    },
    {
      "epoch": 1.000738552437223,
      "grad_norm": 0.23714330792427063,
      "learning_rate": 0.00013336617810075135,
      "loss": 0.2289,
      "step": 2710
    },
    {
      "epoch": 1.0011078286558346,
      "grad_norm": 0.24866294860839844,
      "learning_rate": 0.00013334154452518783,
      "loss": 0.2356,
      "step": 2711
    },
    {
      "epoch": 1.0014771048744462,
      "grad_norm": 0.22556094825267792,
      "learning_rate": 0.00013331691094962435,
      "loss": 0.2262,
      "step": 2712
    },
    {
      "epoch": 1.0018463810930576,
      "grad_norm": 0.25270405411720276,
      "learning_rate": 0.00013329227737406084,
      "loss": 0.2228,
      "step": 2713
    },
    {
      "epoch": 1.0022156573116692,
      "grad_norm": 0.2538648247718811,
      "learning_rate": 0.00013326764379849738,
      "loss": 0.2222,
      "step": 2714
    },
    {
      "epoch": 1.0025849335302806,
      "grad_norm": 0.252612441778183,
      "learning_rate": 0.00013324301022293387,
      "loss": 0.2648,
      "step": 2715
    },
    {
      "epoch": 1.0029542097488922,
      "grad_norm": 0.1996837705373764,
      "learning_rate": 0.00013321837664737038,
      "loss": 0.2038,
      "step": 2716
    },
    {
      "epoch": 1.0033234859675038,
      "grad_norm": 0.2726893424987793,
      "learning_rate": 0.00013319374307180687,
      "loss": 0.2449,
      "step": 2717
    },
    {
      "epoch": 1.0036927621861151,
      "grad_norm": 0.2307356745004654,
      "learning_rate": 0.00013316910949624338,
      "loss": 0.2301,
      "step": 2718
    },
    {
      "epoch": 1.0040620384047267,
      "grad_norm": 0.3182328939437866,
      "learning_rate": 0.0001331444759206799,
      "loss": 0.252,
      "step": 2719
    },
    {
      "epoch": 1.0044313146233383,
      "grad_norm": 0.2076626867055893,
      "learning_rate": 0.0001331198423451164,
      "loss": 0.2127,
      "step": 2720
    },
    {
      "epoch": 1.0048005908419497,
      "grad_norm": 0.289315402507782,
      "learning_rate": 0.0001330952087695529,
      "loss": 0.2599,
      "step": 2721
    },
    {
      "epoch": 1.0051698670605613,
      "grad_norm": 0.24626076221466064,
      "learning_rate": 0.00013307057519398941,
      "loss": 0.2289,
      "step": 2722
    },
    {
      "epoch": 1.005539143279173,
      "grad_norm": 0.25126105546951294,
      "learning_rate": 0.00013304594161842593,
      "loss": 0.1995,
      "step": 2723
    },
    {
      "epoch": 1.0059084194977843,
      "grad_norm": 0.3186296224594116,
      "learning_rate": 0.00013302130804286244,
      "loss": 0.2562,
      "step": 2724
    },
    {
      "epoch": 1.006277695716396,
      "grad_norm": 0.26250606775283813,
      "learning_rate": 0.00013299667446729893,
      "loss": 0.2666,
      "step": 2725
    },
    {
      "epoch": 1.0066469719350073,
      "grad_norm": 0.4268067181110382,
      "learning_rate": 0.00013297204089173545,
      "loss": 0.1914,
      "step": 2726
    },
    {
      "epoch": 1.007016248153619,
      "grad_norm": 0.22976088523864746,
      "learning_rate": 0.00013294740731617193,
      "loss": 0.1905,
      "step": 2727
    },
    {
      "epoch": 1.0073855243722305,
      "grad_norm": 0.26273614168167114,
      "learning_rate": 0.00013292277374060848,
      "loss": 0.254,
      "step": 2728
    },
    {
      "epoch": 1.0077548005908419,
      "grad_norm": 0.34758633375167847,
      "learning_rate": 0.00013289814016504496,
      "loss": 0.2775,
      "step": 2729
    },
    {
      "epoch": 1.0081240768094535,
      "grad_norm": 0.2611452639102936,
      "learning_rate": 0.00013287350658948148,
      "loss": 0.256,
      "step": 2730
    },
    {
      "epoch": 1.008493353028065,
      "grad_norm": 0.3135962188243866,
      "learning_rate": 0.00013284887301391797,
      "loss": 0.3241,
      "step": 2731
    },
    {
      "epoch": 1.0088626292466765,
      "grad_norm": 0.26270318031311035,
      "learning_rate": 0.00013282423943835448,
      "loss": 0.2741,
      "step": 2732
    },
    {
      "epoch": 1.009231905465288,
      "grad_norm": 0.22408580780029297,
      "learning_rate": 0.000132799605862791,
      "loss": 0.24,
      "step": 2733
    },
    {
      "epoch": 1.0096011816838995,
      "grad_norm": 0.30195099115371704,
      "learning_rate": 0.0001327749722872275,
      "loss": 0.2597,
      "step": 2734
    },
    {
      "epoch": 1.009970457902511,
      "grad_norm": 0.2831840515136719,
      "learning_rate": 0.000132750338711664,
      "loss": 0.269,
      "step": 2735
    },
    {
      "epoch": 1.0103397341211227,
      "grad_norm": 0.2893206477165222,
      "learning_rate": 0.0001327257051361005,
      "loss": 0.2366,
      "step": 2736
    },
    {
      "epoch": 1.010709010339734,
      "grad_norm": 0.2479332834482193,
      "learning_rate": 0.00013270107156053703,
      "loss": 0.2529,
      "step": 2737
    },
    {
      "epoch": 1.0110782865583456,
      "grad_norm": 0.26697227358818054,
      "learning_rate": 0.00013267643798497354,
      "loss": 0.265,
      "step": 2738
    },
    {
      "epoch": 1.0114475627769572,
      "grad_norm": 0.2914988696575165,
      "learning_rate": 0.00013265180440941003,
      "loss": 0.2525,
      "step": 2739
    },
    {
      "epoch": 1.0118168389955686,
      "grad_norm": 0.2896738350391388,
      "learning_rate": 0.00013262717083384654,
      "loss": 0.2927,
      "step": 2740
    },
    {
      "epoch": 1.0121861152141802,
      "grad_norm": 0.2432498037815094,
      "learning_rate": 0.00013260253725828306,
      "loss": 0.2197,
      "step": 2741
    },
    {
      "epoch": 1.0125553914327918,
      "grad_norm": 0.2340407818555832,
      "learning_rate": 0.00013257790368271957,
      "loss": 0.2821,
      "step": 2742
    },
    {
      "epoch": 1.0129246676514032,
      "grad_norm": 0.3191532492637634,
      "learning_rate": 0.00013255327010715606,
      "loss": 0.2787,
      "step": 2743
    },
    {
      "epoch": 1.0132939438700148,
      "grad_norm": 0.2743033170700073,
      "learning_rate": 0.00013252863653159257,
      "loss": 0.2802,
      "step": 2744
    },
    {
      "epoch": 1.0136632200886262,
      "grad_norm": 0.30244073271751404,
      "learning_rate": 0.00013250400295602906,
      "loss": 0.2712,
      "step": 2745
    },
    {
      "epoch": 1.0140324963072378,
      "grad_norm": 0.2862381935119629,
      "learning_rate": 0.0001324793693804656,
      "loss": 0.2567,
      "step": 2746
    },
    {
      "epoch": 1.0144017725258494,
      "grad_norm": 0.21522857248783112,
      "learning_rate": 0.0001324547358049021,
      "loss": 0.2134,
      "step": 2747
    },
    {
      "epoch": 1.0147710487444608,
      "grad_norm": 0.23117266595363617,
      "learning_rate": 0.0001324301022293386,
      "loss": 0.2232,
      "step": 2748
    },
    {
      "epoch": 1.0151403249630724,
      "grad_norm": 0.2996717095375061,
      "learning_rate": 0.0001324054686537751,
      "loss": 0.2629,
      "step": 2749
    },
    {
      "epoch": 1.015509601181684,
      "grad_norm": 0.2513776421546936,
      "learning_rate": 0.0001323808350782116,
      "loss": 0.2296,
      "step": 2750
    },
    {
      "epoch": 1.015509601181684,
      "eval_loss": 0.2765809893608093,
      "eval_runtime": 5.8669,
      "eval_samples_per_second": 8.522,
      "eval_steps_per_second": 1.193,
      "step": 2750
    },
    {
      "epoch": 1.0158788774002954,
      "grad_norm": 0.22069954872131348,
      "learning_rate": 0.00013235620150264812,
      "loss": 0.1827,
      "step": 2751
    },
    {
      "epoch": 1.016248153618907,
      "grad_norm": 0.23865504562854767,
      "learning_rate": 0.00013233156792708464,
      "loss": 0.2429,
      "step": 2752
    },
    {
      "epoch": 1.0166174298375186,
      "grad_norm": 0.3293781578540802,
      "learning_rate": 0.00013230693435152112,
      "loss": 0.297,
      "step": 2753
    },
    {
      "epoch": 1.01698670605613,
      "grad_norm": 0.26011794805526733,
      "learning_rate": 0.00013228230077595764,
      "loss": 0.2156,
      "step": 2754
    },
    {
      "epoch": 1.0173559822747416,
      "grad_norm": 0.28752657771110535,
      "learning_rate": 0.00013225766720039415,
      "loss": 0.2522,
      "step": 2755
    },
    {
      "epoch": 1.017725258493353,
      "grad_norm": 0.2574158310890198,
      "learning_rate": 0.00013223303362483067,
      "loss": 0.2289,
      "step": 2756
    },
    {
      "epoch": 1.0180945347119645,
      "grad_norm": 0.2493678331375122,
      "learning_rate": 0.00013220840004926716,
      "loss": 0.22,
      "step": 2757
    },
    {
      "epoch": 1.0184638109305761,
      "grad_norm": 0.24139755964279175,
      "learning_rate": 0.00013218376647370367,
      "loss": 0.2285,
      "step": 2758
    },
    {
      "epoch": 1.0188330871491875,
      "grad_norm": 0.30356085300445557,
      "learning_rate": 0.00013215913289814016,
      "loss": 0.2515,
      "step": 2759
    },
    {
      "epoch": 1.0192023633677991,
      "grad_norm": 0.31113043427467346,
      "learning_rate": 0.0001321344993225767,
      "loss": 0.2994,
      "step": 2760
    },
    {
      "epoch": 1.0195716395864107,
      "grad_norm": 0.26762205362319946,
      "learning_rate": 0.0001321098657470132,
      "loss": 0.2276,
      "step": 2761
    },
    {
      "epoch": 1.019940915805022,
      "grad_norm": 0.24827049672603607,
      "learning_rate": 0.0001320852321714497,
      "loss": 0.2461,
      "step": 2762
    },
    {
      "epoch": 1.0203101920236337,
      "grad_norm": 0.23613737523555756,
      "learning_rate": 0.0001320605985958862,
      "loss": 0.2419,
      "step": 2763
    },
    {
      "epoch": 1.020679468242245,
      "grad_norm": 0.3247947692871094,
      "learning_rate": 0.0001320359650203227,
      "loss": 0.3048,
      "step": 2764
    },
    {
      "epoch": 1.0210487444608567,
      "grad_norm": 0.2768281102180481,
      "learning_rate": 0.00013201133144475922,
      "loss": 0.2571,
      "step": 2765
    },
    {
      "epoch": 1.0214180206794683,
      "grad_norm": 0.24561360478401184,
      "learning_rate": 0.00013198669786919573,
      "loss": 0.2267,
      "step": 2766
    },
    {
      "epoch": 1.0217872968980797,
      "grad_norm": 0.29054397344589233,
      "learning_rate": 0.00013196206429363222,
      "loss": 0.268,
      "step": 2767
    },
    {
      "epoch": 1.0221565731166913,
      "grad_norm": 0.28214576840400696,
      "learning_rate": 0.00013193743071806874,
      "loss": 0.2309,
      "step": 2768
    },
    {
      "epoch": 1.0225258493353029,
      "grad_norm": 0.28700947761535645,
      "learning_rate": 0.00013191279714250525,
      "loss": 0.2764,
      "step": 2769
    },
    {
      "epoch": 1.0228951255539143,
      "grad_norm": 0.3498358726501465,
      "learning_rate": 0.00013188816356694176,
      "loss": 0.3117,
      "step": 2770
    },
    {
      "epoch": 1.0232644017725259,
      "grad_norm": 0.2784065306186676,
      "learning_rate": 0.00013186352999137825,
      "loss": 0.2392,
      "step": 2771
    },
    {
      "epoch": 1.0236336779911375,
      "grad_norm": 0.2639756202697754,
      "learning_rate": 0.00013183889641581477,
      "loss": 0.2034,
      "step": 2772
    },
    {
      "epoch": 1.0240029542097489,
      "grad_norm": 0.25432780385017395,
      "learning_rate": 0.00013181426284025125,
      "loss": 0.257,
      "step": 2773
    },
    {
      "epoch": 1.0243722304283605,
      "grad_norm": 0.24631430208683014,
      "learning_rate": 0.0001317896292646878,
      "loss": 0.26,
      "step": 2774
    },
    {
      "epoch": 1.0247415066469718,
      "grad_norm": 0.23217861354351044,
      "learning_rate": 0.00013176499568912428,
      "loss": 0.209,
      "step": 2775
    },
    {
      "epoch": 1.0251107828655834,
      "grad_norm": 0.24988959729671478,
      "learning_rate": 0.0001317403621135608,
      "loss": 0.2473,
      "step": 2776
    },
    {
      "epoch": 1.025480059084195,
      "grad_norm": 0.2612614035606384,
      "learning_rate": 0.00013171572853799729,
      "loss": 0.231,
      "step": 2777
    },
    {
      "epoch": 1.0258493353028064,
      "grad_norm": 0.23968425393104553,
      "learning_rate": 0.0001316910949624338,
      "loss": 0.2407,
      "step": 2778
    },
    {
      "epoch": 1.026218611521418,
      "grad_norm": 0.22235450148582458,
      "learning_rate": 0.00013166646138687032,
      "loss": 0.1932,
      "step": 2779
    },
    {
      "epoch": 1.0265878877400296,
      "grad_norm": 0.24761974811553955,
      "learning_rate": 0.00013164182781130683,
      "loss": 0.2374,
      "step": 2780
    },
    {
      "epoch": 1.026957163958641,
      "grad_norm": 0.3479146659374237,
      "learning_rate": 0.00013161719423574332,
      "loss": 0.3226,
      "step": 2781
    },
    {
      "epoch": 1.0273264401772526,
      "grad_norm": 0.31968605518341064,
      "learning_rate": 0.00013159256066017983,
      "loss": 0.2611,
      "step": 2782
    },
    {
      "epoch": 1.0276957163958642,
      "grad_norm": 0.3102746605873108,
      "learning_rate": 0.00013156792708461635,
      "loss": 0.2464,
      "step": 2783
    },
    {
      "epoch": 1.0280649926144756,
      "grad_norm": 0.22494328022003174,
      "learning_rate": 0.00013154329350905286,
      "loss": 0.2197,
      "step": 2784
    },
    {
      "epoch": 1.0284342688330872,
      "grad_norm": 0.2669663429260254,
      "learning_rate": 0.00013151865993348935,
      "loss": 0.2484,
      "step": 2785
    },
    {
      "epoch": 1.0288035450516986,
      "grad_norm": 0.22760596871376038,
      "learning_rate": 0.00013149402635792586,
      "loss": 0.2172,
      "step": 2786
    },
    {
      "epoch": 1.0291728212703102,
      "grad_norm": 0.2485930472612381,
      "learning_rate": 0.00013146939278236238,
      "loss": 0.2698,
      "step": 2787
    },
    {
      "epoch": 1.0295420974889218,
      "grad_norm": 0.2849338948726654,
      "learning_rate": 0.0001314447592067989,
      "loss": 0.2545,
      "step": 2788
    },
    {
      "epoch": 1.0299113737075332,
      "grad_norm": 0.26047658920288086,
      "learning_rate": 0.00013142012563123538,
      "loss": 0.2296,
      "step": 2789
    },
    {
      "epoch": 1.0302806499261448,
      "grad_norm": 0.2526266276836395,
      "learning_rate": 0.00013139549205567187,
      "loss": 0.2553,
      "step": 2790
    },
    {
      "epoch": 1.0306499261447564,
      "grad_norm": 0.2689168155193329,
      "learning_rate": 0.00013137085848010838,
      "loss": 0.2323,
      "step": 2791
    },
    {
      "epoch": 1.0310192023633677,
      "grad_norm": 0.28571009635925293,
      "learning_rate": 0.0001313462249045449,
      "loss": 0.2889,
      "step": 2792
    },
    {
      "epoch": 1.0313884785819794,
      "grad_norm": 0.2267007678747177,
      "learning_rate": 0.0001313215913289814,
      "loss": 0.2128,
      "step": 2793
    },
    {
      "epoch": 1.0317577548005907,
      "grad_norm": 0.23780003190040588,
      "learning_rate": 0.0001312969577534179,
      "loss": 0.1987,
      "step": 2794
    },
    {
      "epoch": 1.0321270310192023,
      "grad_norm": 0.2646009027957916,
      "learning_rate": 0.00013127232417785441,
      "loss": 0.2778,
      "step": 2795
    },
    {
      "epoch": 1.032496307237814,
      "grad_norm": 0.2047690451145172,
      "learning_rate": 0.00013124769060229093,
      "loss": 0.2001,
      "step": 2796
    },
    {
      "epoch": 1.0328655834564253,
      "grad_norm": 0.24524430930614471,
      "learning_rate": 0.00013122305702672744,
      "loss": 0.2674,
      "step": 2797
    },
    {
      "epoch": 1.033234859675037,
      "grad_norm": 0.2460377961397171,
      "learning_rate": 0.00013119842345116393,
      "loss": 0.2551,
      "step": 2798
    },
    {
      "epoch": 1.0336041358936485,
      "grad_norm": 0.2284260094165802,
      "learning_rate": 0.00013117378987560045,
      "loss": 0.1842,
      "step": 2799
    },
    {
      "epoch": 1.03397341211226,
      "grad_norm": 0.30626076459884644,
      "learning_rate": 0.00013114915630003693,
      "loss": 0.2457,
      "step": 2800
    },
    {
      "epoch": 1.03397341211226,
      "eval_loss": 0.27627602219581604,
      "eval_runtime": 5.8633,
      "eval_samples_per_second": 8.528,
      "eval_steps_per_second": 1.194,
      "step": 2800
    },
    {
      "epoch": 1.0343426883308715,
      "grad_norm": 0.27121275663375854,
      "learning_rate": 0.00013112452272447347,
      "loss": 0.2557,
      "step": 2801
    },
    {
      "epoch": 1.034711964549483,
      "grad_norm": 0.23903623223304749,
      "learning_rate": 0.00013109988914890996,
      "loss": 0.2436,
      "step": 2802
    },
    {
      "epoch": 1.0350812407680945,
      "grad_norm": 0.26895764470100403,
      "learning_rate": 0.00013107525557334648,
      "loss": 0.253,
      "step": 2803
    },
    {
      "epoch": 1.035450516986706,
      "grad_norm": 0.3080110251903534,
      "learning_rate": 0.00013105062199778296,
      "loss": 0.2786,
      "step": 2804
    },
    {
      "epoch": 1.0358197932053175,
      "grad_norm": 0.23358094692230225,
      "learning_rate": 0.00013102598842221948,
      "loss": 0.2456,
      "step": 2805
    },
    {
      "epoch": 1.036189069423929,
      "grad_norm": 0.22798776626586914,
      "learning_rate": 0.000131001354846656,
      "loss": 0.2256,
      "step": 2806
    },
    {
      "epoch": 1.0365583456425407,
      "grad_norm": 0.24336901307106018,
      "learning_rate": 0.0001309767212710925,
      "loss": 0.2367,
      "step": 2807
    },
    {
      "epoch": 1.036927621861152,
      "grad_norm": 0.2236175686120987,
      "learning_rate": 0.000130952087695529,
      "loss": 0.2216,
      "step": 2808
    },
    {
      "epoch": 1.0372968980797637,
      "grad_norm": 0.25561004877090454,
      "learning_rate": 0.0001309274541199655,
      "loss": 0.263,
      "step": 2809
    },
    {
      "epoch": 1.0376661742983753,
      "grad_norm": 0.3103543519973755,
      "learning_rate": 0.00013090282054440203,
      "loss": 0.2854,
      "step": 2810
    },
    {
      "epoch": 1.0380354505169866,
      "grad_norm": 0.266419917345047,
      "learning_rate": 0.00013087818696883854,
      "loss": 0.2509,
      "step": 2811
    },
    {
      "epoch": 1.0384047267355982,
      "grad_norm": 0.29172733426094055,
      "learning_rate": 0.00013085355339327503,
      "loss": 0.2717,
      "step": 2812
    },
    {
      "epoch": 1.0387740029542099,
      "grad_norm": 0.2763681411743164,
      "learning_rate": 0.00013082891981771154,
      "loss": 0.3048,
      "step": 2813
    },
    {
      "epoch": 1.0391432791728212,
      "grad_norm": 0.2837805449962616,
      "learning_rate": 0.00013080428624214806,
      "loss": 0.2704,
      "step": 2814
    },
    {
      "epoch": 1.0395125553914328,
      "grad_norm": 0.2764654755592346,
      "learning_rate": 0.00013077965266658457,
      "loss": 0.2343,
      "step": 2815
    },
    {
      "epoch": 1.0398818316100442,
      "grad_norm": 0.3275948166847229,
      "learning_rate": 0.00013075501909102106,
      "loss": 0.283,
      "step": 2816
    },
    {
      "epoch": 1.0402511078286558,
      "grad_norm": 0.27809953689575195,
      "learning_rate": 0.00013073038551545757,
      "loss": 0.2474,
      "step": 2817
    },
    {
      "epoch": 1.0406203840472674,
      "grad_norm": 0.2790237367153168,
      "learning_rate": 0.00013070575193989406,
      "loss": 0.2589,
      "step": 2818
    },
    {
      "epoch": 1.0409896602658788,
      "grad_norm": 0.2769007980823517,
      "learning_rate": 0.0001306811183643306,
      "loss": 0.2463,
      "step": 2819
    },
    {
      "epoch": 1.0413589364844904,
      "grad_norm": 0.38644951581954956,
      "learning_rate": 0.0001306564847887671,
      "loss": 0.2702,
      "step": 2820
    },
    {
      "epoch": 1.041728212703102,
      "grad_norm": 0.2395869642496109,
      "learning_rate": 0.0001306318512132036,
      "loss": 0.2409,
      "step": 2821
    },
    {
      "epoch": 1.0420974889217134,
      "grad_norm": 0.35608041286468506,
      "learning_rate": 0.0001306072176376401,
      "loss": 0.2944,
      "step": 2822
    },
    {
      "epoch": 1.042466765140325,
      "grad_norm": 0.3523600399494171,
      "learning_rate": 0.0001305825840620766,
      "loss": 0.2428,
      "step": 2823
    },
    {
      "epoch": 1.0428360413589366,
      "grad_norm": 0.26375600695610046,
      "learning_rate": 0.00013055795048651312,
      "loss": 0.2745,
      "step": 2824
    },
    {
      "epoch": 1.043205317577548,
      "grad_norm": 0.30907225608825684,
      "learning_rate": 0.00013053331691094964,
      "loss": 0.2518,
      "step": 2825
    },
    {
      "epoch": 1.0435745937961596,
      "grad_norm": 0.2785424292087555,
      "learning_rate": 0.00013050868333538612,
      "loss": 0.237,
      "step": 2826
    },
    {
      "epoch": 1.043943870014771,
      "grad_norm": 0.26143819093704224,
      "learning_rate": 0.00013048404975982264,
      "loss": 0.2381,
      "step": 2827
    },
    {
      "epoch": 1.0443131462333826,
      "grad_norm": 0.2775009274482727,
      "learning_rate": 0.00013045941618425915,
      "loss": 0.2384,
      "step": 2828
    },
    {
      "epoch": 1.0446824224519942,
      "grad_norm": 0.3305076062679291,
      "learning_rate": 0.00013043478260869567,
      "loss": 0.3022,
      "step": 2829
    },
    {
      "epoch": 1.0450516986706055,
      "grad_norm": 0.34179162979125977,
      "learning_rate": 0.00013041014903313216,
      "loss": 0.2568,
      "step": 2830
    },
    {
      "epoch": 1.0454209748892171,
      "grad_norm": 0.2659331262111664,
      "learning_rate": 0.00013038551545756867,
      "loss": 0.2243,
      "step": 2831
    },
    {
      "epoch": 1.0457902511078287,
      "grad_norm": 0.33292558789253235,
      "learning_rate": 0.00013036088188200516,
      "loss": 0.232,
      "step": 2832
    },
    {
      "epoch": 1.0461595273264401,
      "grad_norm": 0.28080078959465027,
      "learning_rate": 0.0001303362483064417,
      "loss": 0.2678,
      "step": 2833
    },
    {
      "epoch": 1.0465288035450517,
      "grad_norm": 0.27119290828704834,
      "learning_rate": 0.0001303116147308782,
      "loss": 0.2687,
      "step": 2834
    },
    {
      "epoch": 1.0468980797636631,
      "grad_norm": 0.2821057140827179,
      "learning_rate": 0.0001302869811553147,
      "loss": 0.2614,
      "step": 2835
    },
    {
      "epoch": 1.0472673559822747,
      "grad_norm": 0.30902543663978577,
      "learning_rate": 0.0001302623475797512,
      "loss": 0.227,
      "step": 2836
    },
    {
      "epoch": 1.0476366322008863,
      "grad_norm": 0.23212537169456482,
      "learning_rate": 0.0001302377140041877,
      "loss": 0.1726,
      "step": 2837
    },
    {
      "epoch": 1.0480059084194977,
      "grad_norm": 0.25574445724487305,
      "learning_rate": 0.00013021308042862422,
      "loss": 0.2019,
      "step": 2838
    },
    {
      "epoch": 1.0483751846381093,
      "grad_norm": 0.32025423645973206,
      "learning_rate": 0.00013018844685306073,
      "loss": 0.2587,
      "step": 2839
    },
    {
      "epoch": 1.048744460856721,
      "grad_norm": 0.23577898740768433,
      "learning_rate": 0.00013016381327749722,
      "loss": 0.2392,
      "step": 2840
    },
    {
      "epoch": 1.0491137370753323,
      "grad_norm": 0.23110058903694153,
      "learning_rate": 0.00013013917970193374,
      "loss": 0.2344,
      "step": 2841
    },
    {
      "epoch": 1.049483013293944,
      "grad_norm": 0.3195451498031616,
      "learning_rate": 0.00013011454612637025,
      "loss": 0.2291,
      "step": 2842
    },
    {
      "epoch": 1.0498522895125555,
      "grad_norm": 0.23478031158447266,
      "learning_rate": 0.00013008991255080676,
      "loss": 0.256,
      "step": 2843
    },
    {
      "epoch": 1.0502215657311669,
      "grad_norm": 0.2877449691295624,
      "learning_rate": 0.00013006527897524325,
      "loss": 0.2616,
      "step": 2844
    },
    {
      "epoch": 1.0505908419497785,
      "grad_norm": 0.28867214918136597,
      "learning_rate": 0.00013004064539967977,
      "loss": 0.2836,
      "step": 2845
    },
    {
      "epoch": 1.0509601181683899,
      "grad_norm": 0.299875408411026,
      "learning_rate": 0.00013001601182411628,
      "loss": 0.2304,
      "step": 2846
    },
    {
      "epoch": 1.0513293943870015,
      "grad_norm": 0.24449953436851501,
      "learning_rate": 0.0001299913782485528,
      "loss": 0.1973,
      "step": 2847
    },
    {
      "epoch": 1.051698670605613,
      "grad_norm": 0.2585119605064392,
      "learning_rate": 0.00012996674467298928,
      "loss": 0.187,
      "step": 2848
    },
    {
      "epoch": 1.0520679468242244,
      "grad_norm": 0.24200786650180817,
      "learning_rate": 0.0001299421110974258,
      "loss": 0.2077,
      "step": 2849
    },
    {
      "epoch": 1.052437223042836,
      "grad_norm": 0.31775715947151184,
      "learning_rate": 0.00012991747752186229,
      "loss": 0.2987,
      "step": 2850
    },
    {
      "epoch": 1.052437223042836,
      "eval_loss": 0.27618589997291565,
      "eval_runtime": 5.8566,
      "eval_samples_per_second": 8.537,
      "eval_steps_per_second": 1.195,
      "step": 2850
    },
    {
      "epoch": 1.0528064992614476,
      "grad_norm": 0.3076942265033722,
      "learning_rate": 0.00012989284394629883,
      "loss": 0.2668,
      "step": 2851
    },
    {
      "epoch": 1.053175775480059,
      "grad_norm": 0.2545338273048401,
      "learning_rate": 0.00012986821037073531,
      "loss": 0.2562,
      "step": 2852
    },
    {
      "epoch": 1.0535450516986706,
      "grad_norm": 0.23804230988025665,
      "learning_rate": 0.00012984357679517183,
      "loss": 0.1993,
      "step": 2853
    },
    {
      "epoch": 1.0539143279172822,
      "grad_norm": 0.26753851771354675,
      "learning_rate": 0.00012981894321960832,
      "loss": 0.2218,
      "step": 2854
    },
    {
      "epoch": 1.0542836041358936,
      "grad_norm": 0.2615346610546112,
      "learning_rate": 0.00012979430964404483,
      "loss": 0.2618,
      "step": 2855
    },
    {
      "epoch": 1.0546528803545052,
      "grad_norm": 0.23570382595062256,
      "learning_rate": 0.00012976967606848135,
      "loss": 0.1975,
      "step": 2856
    },
    {
      "epoch": 1.0550221565731166,
      "grad_norm": 0.2610030770301819,
      "learning_rate": 0.00012974504249291786,
      "loss": 0.2375,
      "step": 2857
    },
    {
      "epoch": 1.0553914327917282,
      "grad_norm": 0.3134918808937073,
      "learning_rate": 0.00012972040891735435,
      "loss": 0.2299,
      "step": 2858
    },
    {
      "epoch": 1.0557607090103398,
      "grad_norm": 0.27121251821517944,
      "learning_rate": 0.00012969577534179086,
      "loss": 0.268,
      "step": 2859
    },
    {
      "epoch": 1.0561299852289512,
      "grad_norm": 0.26513317227363586,
      "learning_rate": 0.00012967114176622738,
      "loss": 0.219,
      "step": 2860
    },
    {
      "epoch": 1.0564992614475628,
      "grad_norm": 0.25374358892440796,
      "learning_rate": 0.0001296465081906639,
      "loss": 0.2188,
      "step": 2861
    },
    {
      "epoch": 1.0568685376661744,
      "grad_norm": 0.35821032524108887,
      "learning_rate": 0.00012962187461510038,
      "loss": 0.2754,
      "step": 2862
    },
    {
      "epoch": 1.0572378138847858,
      "grad_norm": 0.2845410108566284,
      "learning_rate": 0.0001295972410395369,
      "loss": 0.2178,
      "step": 2863
    },
    {
      "epoch": 1.0576070901033974,
      "grad_norm": 0.29702913761138916,
      "learning_rate": 0.00012957260746397338,
      "loss": 0.2715,
      "step": 2864
    },
    {
      "epoch": 1.0579763663220088,
      "grad_norm": 0.2890467047691345,
      "learning_rate": 0.00012954797388840992,
      "loss": 0.2827,
      "step": 2865
    },
    {
      "epoch": 1.0583456425406204,
      "grad_norm": 0.2700360417366028,
      "learning_rate": 0.0001295233403128464,
      "loss": 0.2362,
      "step": 2866
    },
    {
      "epoch": 1.058714918759232,
      "grad_norm": 0.25590065121650696,
      "learning_rate": 0.00012949870673728293,
      "loss": 0.2079,
      "step": 2867
    },
    {
      "epoch": 1.0590841949778433,
      "grad_norm": 0.3024281859397888,
      "learning_rate": 0.0001294740731617194,
      "loss": 0.2651,
      "step": 2868
    },
    {
      "epoch": 1.059453471196455,
      "grad_norm": 0.23602551221847534,
      "learning_rate": 0.00012944943958615593,
      "loss": 0.195,
      "step": 2869
    },
    {
      "epoch": 1.0598227474150665,
      "grad_norm": 0.2701203227043152,
      "learning_rate": 0.00012942480601059244,
      "loss": 0.253,
      "step": 2870
    },
    {
      "epoch": 1.060192023633678,
      "grad_norm": 0.3156161606311798,
      "learning_rate": 0.00012940017243502896,
      "loss": 0.2887,
      "step": 2871
    },
    {
      "epoch": 1.0605612998522895,
      "grad_norm": 0.21855607628822327,
      "learning_rate": 0.00012937553885946544,
      "loss": 0.1972,
      "step": 2872
    },
    {
      "epoch": 1.0609305760709011,
      "grad_norm": 0.3342536687850952,
      "learning_rate": 0.00012935090528390196,
      "loss": 0.2403,
      "step": 2873
    },
    {
      "epoch": 1.0612998522895125,
      "grad_norm": 0.24294506013393402,
      "learning_rate": 0.00012932627170833847,
      "loss": 0.234,
      "step": 2874
    },
    {
      "epoch": 1.0616691285081241,
      "grad_norm": 0.2701275050640106,
      "learning_rate": 0.000129301638132775,
      "loss": 0.2289,
      "step": 2875
    },
    {
      "epoch": 1.0620384047267355,
      "grad_norm": 0.2676856219768524,
      "learning_rate": 0.00012927700455721148,
      "loss": 0.2238,
      "step": 2876
    },
    {
      "epoch": 1.062407680945347,
      "grad_norm": 0.2500893473625183,
      "learning_rate": 0.000129252370981648,
      "loss": 0.2226,
      "step": 2877
    },
    {
      "epoch": 1.0627769571639587,
      "grad_norm": 0.2777494490146637,
      "learning_rate": 0.0001292277374060845,
      "loss": 0.2412,
      "step": 2878
    },
    {
      "epoch": 1.06314623338257,
      "grad_norm": 0.3415136933326721,
      "learning_rate": 0.00012920310383052102,
      "loss": 0.2569,
      "step": 2879
    },
    {
      "epoch": 1.0635155096011817,
      "grad_norm": 0.269741028547287,
      "learning_rate": 0.0001291784702549575,
      "loss": 0.2576,
      "step": 2880
    },
    {
      "epoch": 1.0638847858197933,
      "grad_norm": 0.2691381275653839,
      "learning_rate": 0.00012915383667939402,
      "loss": 0.2098,
      "step": 2881
    },
    {
      "epoch": 1.0642540620384047,
      "grad_norm": 0.257697194814682,
      "learning_rate": 0.0001291292031038305,
      "loss": 0.2526,
      "step": 2882
    },
    {
      "epoch": 1.0646233382570163,
      "grad_norm": 0.2562095820903778,
      "learning_rate": 0.00012910456952826705,
      "loss": 0.2164,
      "step": 2883
    },
    {
      "epoch": 1.0649926144756279,
      "grad_norm": 0.2850838899612427,
      "learning_rate": 0.00012907993595270354,
      "loss": 0.2251,
      "step": 2884
    },
    {
      "epoch": 1.0653618906942393,
      "grad_norm": 0.23420360684394836,
      "learning_rate": 0.00012905530237714005,
      "loss": 0.2164,
      "step": 2885
    },
    {
      "epoch": 1.0657311669128509,
      "grad_norm": 0.29589250683784485,
      "learning_rate": 0.00012903066880157654,
      "loss": 0.3105,
      "step": 2886
    },
    {
      "epoch": 1.0661004431314622,
      "grad_norm": 0.23718823492527008,
      "learning_rate": 0.00012900603522601306,
      "loss": 0.2262,
      "step": 2887
    },
    {
      "epoch": 1.0664697193500738,
      "grad_norm": 0.26658695936203003,
      "learning_rate": 0.00012898140165044957,
      "loss": 0.2385,
      "step": 2888
    },
    {
      "epoch": 1.0668389955686854,
      "grad_norm": 0.3809283673763275,
      "learning_rate": 0.00012895676807488609,
      "loss": 0.3091,
      "step": 2889
    },
    {
      "epoch": 1.0672082717872968,
      "grad_norm": 0.2975718080997467,
      "learning_rate": 0.00012893213449932257,
      "loss": 0.2426,
      "step": 2890
    },
    {
      "epoch": 1.0675775480059084,
      "grad_norm": 0.27787643671035767,
      "learning_rate": 0.0001289075009237591,
      "loss": 0.2769,
      "step": 2891
    },
    {
      "epoch": 1.06794682422452,
      "grad_norm": 0.27044934034347534,
      "learning_rate": 0.0001288828673481956,
      "loss": 0.2623,
      "step": 2892
    },
    {
      "epoch": 1.0683161004431314,
      "grad_norm": 0.24681483209133148,
      "learning_rate": 0.00012885823377263212,
      "loss": 0.2196,
      "step": 2893
    },
    {
      "epoch": 1.068685376661743,
      "grad_norm": 0.26946964859962463,
      "learning_rate": 0.0001288336001970686,
      "loss": 0.2263,
      "step": 2894
    },
    {
      "epoch": 1.0690546528803546,
      "grad_norm": 0.23332957923412323,
      "learning_rate": 0.00012880896662150512,
      "loss": 0.2418,
      "step": 2895
    },
    {
      "epoch": 1.069423929098966,
      "grad_norm": 0.28630056977272034,
      "learning_rate": 0.0001287843330459416,
      "loss": 0.2658,
      "step": 2896
    },
    {
      "epoch": 1.0697932053175776,
      "grad_norm": 0.3108094036579132,
      "learning_rate": 0.00012875969947037815,
      "loss": 0.2755,
      "step": 2897
    },
    {
      "epoch": 1.070162481536189,
      "grad_norm": 0.39538905024528503,
      "learning_rate": 0.00012873506589481464,
      "loss": 0.2336,
      "step": 2898
    },
    {
      "epoch": 1.0705317577548006,
      "grad_norm": 0.2604738771915436,
      "learning_rate": 0.00012871043231925115,
      "loss": 0.2361,
      "step": 2899
    },
    {
      "epoch": 1.0709010339734122,
      "grad_norm": 0.33082088828086853,
      "learning_rate": 0.00012868579874368764,
      "loss": 0.2508,
      "step": 2900
    },
    {
      "epoch": 1.0709010339734122,
      "eval_loss": 0.2767813801765442,
      "eval_runtime": 5.8576,
      "eval_samples_per_second": 8.536,
      "eval_steps_per_second": 1.195,
      "step": 2900
    },
    {
      "epoch": 1.0712703101920236,
      "grad_norm": 0.2974438965320587,
      "learning_rate": 0.00012866116516812415,
      "loss": 0.2323,
      "step": 2901
    },
    {
      "epoch": 1.0716395864106352,
      "grad_norm": 0.24254891276359558,
      "learning_rate": 0.00012863653159256067,
      "loss": 0.2219,
      "step": 2902
    },
    {
      "epoch": 1.0720088626292468,
      "grad_norm": 0.35016727447509766,
      "learning_rate": 0.00012861189801699718,
      "loss": 0.2304,
      "step": 2903
    },
    {
      "epoch": 1.0723781388478582,
      "grad_norm": 0.31191927194595337,
      "learning_rate": 0.00012858726444143367,
      "loss": 0.252,
      "step": 2904
    },
    {
      "epoch": 1.0727474150664698,
      "grad_norm": 0.25844889879226685,
      "learning_rate": 0.00012856263086587018,
      "loss": 0.26,
      "step": 2905
    },
    {
      "epoch": 1.0731166912850811,
      "grad_norm": 0.22783496975898743,
      "learning_rate": 0.0001285379972903067,
      "loss": 0.2091,
      "step": 2906
    },
    {
      "epoch": 1.0734859675036927,
      "grad_norm": 0.34771570563316345,
      "learning_rate": 0.0001285133637147432,
      "loss": 0.2972,
      "step": 2907
    },
    {
      "epoch": 1.0738552437223043,
      "grad_norm": 0.2750411033630371,
      "learning_rate": 0.0001284887301391797,
      "loss": 0.2472,
      "step": 2908
    },
    {
      "epoch": 1.0742245199409157,
      "grad_norm": 0.2285848706960678,
      "learning_rate": 0.00012846409656361622,
      "loss": 0.2443,
      "step": 2909
    },
    {
      "epoch": 1.0745937961595273,
      "grad_norm": 0.21732871234416962,
      "learning_rate": 0.00012843946298805273,
      "loss": 0.2002,
      "step": 2910
    },
    {
      "epoch": 1.074963072378139,
      "grad_norm": 0.2981227934360504,
      "learning_rate": 0.00012841482941248924,
      "loss": 0.2692,
      "step": 2911
    },
    {
      "epoch": 1.0753323485967503,
      "grad_norm": 0.24542368948459625,
      "learning_rate": 0.00012839019583692573,
      "loss": 0.2218,
      "step": 2912
    },
    {
      "epoch": 1.075701624815362,
      "grad_norm": 0.26330628991127014,
      "learning_rate": 0.00012836556226136225,
      "loss": 0.2446,
      "step": 2913
    },
    {
      "epoch": 1.0760709010339735,
      "grad_norm": 0.3054066300392151,
      "learning_rate": 0.00012834092868579873,
      "loss": 0.2813,
      "step": 2914
    },
    {
      "epoch": 1.076440177252585,
      "grad_norm": 0.2733544111251831,
      "learning_rate": 0.00012831629511023528,
      "loss": 0.2107,
      "step": 2915
    },
    {
      "epoch": 1.0768094534711965,
      "grad_norm": 0.28098398447036743,
      "learning_rate": 0.00012829166153467176,
      "loss": 0.2273,
      "step": 2916
    },
    {
      "epoch": 1.0771787296898079,
      "grad_norm": 0.3208807706832886,
      "learning_rate": 0.00012826702795910828,
      "loss": 0.2772,
      "step": 2917
    },
    {
      "epoch": 1.0775480059084195,
      "grad_norm": 0.26194900274276733,
      "learning_rate": 0.00012824239438354477,
      "loss": 0.2187,
      "step": 2918
    },
    {
      "epoch": 1.077917282127031,
      "grad_norm": 0.275849848985672,
      "learning_rate": 0.00012821776080798128,
      "loss": 0.2297,
      "step": 2919
    },
    {
      "epoch": 1.0782865583456425,
      "grad_norm": 0.24260565638542175,
      "learning_rate": 0.0001281931272324178,
      "loss": 0.2383,
      "step": 2920
    },
    {
      "epoch": 1.078655834564254,
      "grad_norm": 0.2160511314868927,
      "learning_rate": 0.0001281684936568543,
      "loss": 0.1778,
      "step": 2921
    },
    {
      "epoch": 1.0790251107828657,
      "grad_norm": 0.2878185212612152,
      "learning_rate": 0.0001281438600812908,
      "loss": 0.2639,
      "step": 2922
    },
    {
      "epoch": 1.079394387001477,
      "grad_norm": 0.2513464391231537,
      "learning_rate": 0.0001281192265057273,
      "loss": 0.2457,
      "step": 2923
    },
    {
      "epoch": 1.0797636632200887,
      "grad_norm": 0.28811386227607727,
      "learning_rate": 0.00012809459293016383,
      "loss": 0.2617,
      "step": 2924
    },
    {
      "epoch": 1.0801329394387,
      "grad_norm": 0.29851648211479187,
      "learning_rate": 0.00012806995935460034,
      "loss": 0.236,
      "step": 2925
    },
    {
      "epoch": 1.0805022156573116,
      "grad_norm": 0.27574729919433594,
      "learning_rate": 0.00012804532577903683,
      "loss": 0.2802,
      "step": 2926
    },
    {
      "epoch": 1.0808714918759232,
      "grad_norm": 0.23372849822044373,
      "learning_rate": 0.00012802069220347334,
      "loss": 0.203,
      "step": 2927
    },
    {
      "epoch": 1.0812407680945346,
      "grad_norm": 0.25483450293540955,
      "learning_rate": 0.00012799605862790983,
      "loss": 0.248,
      "step": 2928
    },
    {
      "epoch": 1.0816100443131462,
      "grad_norm": 0.25262442231178284,
      "learning_rate": 0.00012797142505234637,
      "loss": 0.2007,
      "step": 2929
    },
    {
      "epoch": 1.0819793205317578,
      "grad_norm": 0.2934408187866211,
      "learning_rate": 0.00012794679147678286,
      "loss": 0.2396,
      "step": 2930
    },
    {
      "epoch": 1.0823485967503692,
      "grad_norm": 0.2514856159687042,
      "learning_rate": 0.00012792215790121938,
      "loss": 0.2186,
      "step": 2931
    },
    {
      "epoch": 1.0827178729689808,
      "grad_norm": 0.27486154437065125,
      "learning_rate": 0.00012789752432565586,
      "loss": 0.2593,
      "step": 2932
    },
    {
      "epoch": 1.0830871491875924,
      "grad_norm": 0.2366592139005661,
      "learning_rate": 0.00012787289075009238,
      "loss": 0.2194,
      "step": 2933
    },
    {
      "epoch": 1.0834564254062038,
      "grad_norm": 0.25081318616867065,
      "learning_rate": 0.0001278482571745289,
      "loss": 0.2246,
      "step": 2934
    },
    {
      "epoch": 1.0838257016248154,
      "grad_norm": 0.2999029755592346,
      "learning_rate": 0.0001278236235989654,
      "loss": 0.2836,
      "step": 2935
    },
    {
      "epoch": 1.0841949778434268,
      "grad_norm": 0.23383331298828125,
      "learning_rate": 0.0001277989900234019,
      "loss": 0.2103,
      "step": 2936
    },
    {
      "epoch": 1.0845642540620384,
      "grad_norm": 0.23219743371009827,
      "learning_rate": 0.0001277743564478384,
      "loss": 0.2301,
      "step": 2937
    },
    {
      "epoch": 1.08493353028065,
      "grad_norm": 0.33842626214027405,
      "learning_rate": 0.00012774972287227492,
      "loss": 0.2801,
      "step": 2938
    },
    {
      "epoch": 1.0853028064992614,
      "grad_norm": 0.34319981932640076,
      "learning_rate": 0.00012772508929671144,
      "loss": 0.268,
      "step": 2939
    },
    {
      "epoch": 1.085672082717873,
      "grad_norm": 0.23391127586364746,
      "learning_rate": 0.00012770045572114793,
      "loss": 0.2201,
      "step": 2940
    },
    {
      "epoch": 1.0860413589364846,
      "grad_norm": 0.25276488065719604,
      "learning_rate": 0.00012767582214558444,
      "loss": 0.236,
      "step": 2941
    },
    {
      "epoch": 1.086410635155096,
      "grad_norm": 0.30007094144821167,
      "learning_rate": 0.00012765118857002093,
      "loss": 0.2987,
      "step": 2942
    },
    {
      "epoch": 1.0867799113737076,
      "grad_norm": 0.3073122799396515,
      "learning_rate": 0.00012762655499445747,
      "loss": 0.2702,
      "step": 2943
    },
    {
      "epoch": 1.0871491875923192,
      "grad_norm": 0.27124929428100586,
      "learning_rate": 0.00012760192141889396,
      "loss": 0.2549,
      "step": 2944
    },
    {
      "epoch": 1.0875184638109305,
      "grad_norm": 0.22957691550254822,
      "learning_rate": 0.00012757728784333047,
      "loss": 0.209,
      "step": 2945
    },
    {
      "epoch": 1.0878877400295421,
      "grad_norm": 0.26437053084373474,
      "learning_rate": 0.00012755265426776696,
      "loss": 0.2192,
      "step": 2946
    },
    {
      "epoch": 1.0882570162481535,
      "grad_norm": 0.21729683876037598,
      "learning_rate": 0.00012752802069220347,
      "loss": 0.219,
      "step": 2947
    },
    {
      "epoch": 1.0886262924667651,
      "grad_norm": 0.22185112535953522,
      "learning_rate": 0.00012750338711664,
      "loss": 0.187,
      "step": 2948
    },
    {
      "epoch": 1.0889955686853767,
      "grad_norm": 0.23401515185832977,
      "learning_rate": 0.0001274787535410765,
      "loss": 0.2367,
      "step": 2949
    },
    {
      "epoch": 1.089364844903988,
      "grad_norm": 0.2597702443599701,
      "learning_rate": 0.000127454119965513,
      "loss": 0.2198,
      "step": 2950
    },
    {
      "epoch": 1.089364844903988,
      "eval_loss": 0.2758151888847351,
      "eval_runtime": 5.8574,
      "eval_samples_per_second": 8.536,
      "eval_steps_per_second": 1.195,
      "step": 2950
    },
    {
      "epoch": 1.0897341211225997,
      "grad_norm": 0.23240283131599426,
      "learning_rate": 0.0001274294863899495,
      "loss": 0.1825,
      "step": 2951
    },
    {
      "epoch": 1.0901033973412113,
      "grad_norm": 0.24644871056079865,
      "learning_rate": 0.00012740485281438602,
      "loss": 0.235,
      "step": 2952
    },
    {
      "epoch": 1.0904726735598227,
      "grad_norm": 0.24943925440311432,
      "learning_rate": 0.00012738021923882253,
      "loss": 0.2302,
      "step": 2953
    },
    {
      "epoch": 1.0908419497784343,
      "grad_norm": 0.264442503452301,
      "learning_rate": 0.00012735558566325902,
      "loss": 0.2442,
      "step": 2954
    },
    {
      "epoch": 1.091211225997046,
      "grad_norm": 0.2990545928478241,
      "learning_rate": 0.00012733095208769554,
      "loss": 0.2634,
      "step": 2955
    },
    {
      "epoch": 1.0915805022156573,
      "grad_norm": 0.28452932834625244,
      "learning_rate": 0.00012730631851213205,
      "loss": 0.2528,
      "step": 2956
    },
    {
      "epoch": 1.0919497784342689,
      "grad_norm": 0.2825779914855957,
      "learning_rate": 0.00012728168493656857,
      "loss": 0.2432,
      "step": 2957
    },
    {
      "epoch": 1.0923190546528803,
      "grad_norm": 0.27615490555763245,
      "learning_rate": 0.00012725705136100505,
      "loss": 0.2471,
      "step": 2958
    },
    {
      "epoch": 1.0926883308714919,
      "grad_norm": 0.32125625014305115,
      "learning_rate": 0.00012723241778544157,
      "loss": 0.2899,
      "step": 2959
    },
    {
      "epoch": 1.0930576070901035,
      "grad_norm": 0.2847040593624115,
      "learning_rate": 0.00012720778420987806,
      "loss": 0.2388,
      "step": 2960
    },
    {
      "epoch": 1.0934268833087148,
      "grad_norm": 0.30398204922676086,
      "learning_rate": 0.0001271831506343146,
      "loss": 0.2625,
      "step": 2961
    },
    {
      "epoch": 1.0937961595273265,
      "grad_norm": 0.2955167591571808,
      "learning_rate": 0.00012715851705875108,
      "loss": 0.2384,
      "step": 2962
    },
    {
      "epoch": 1.094165435745938,
      "grad_norm": 0.33222588896751404,
      "learning_rate": 0.0001271338834831876,
      "loss": 0.2391,
      "step": 2963
    },
    {
      "epoch": 1.0945347119645494,
      "grad_norm": 0.27684515714645386,
      "learning_rate": 0.0001271092499076241,
      "loss": 0.2507,
      "step": 2964
    },
    {
      "epoch": 1.094903988183161,
      "grad_norm": 0.29779449105262756,
      "learning_rate": 0.0001270846163320606,
      "loss": 0.2067,
      "step": 2965
    },
    {
      "epoch": 1.0952732644017726,
      "grad_norm": 0.29084447026252747,
      "learning_rate": 0.00012705998275649712,
      "loss": 0.2458,
      "step": 2966
    },
    {
      "epoch": 1.095642540620384,
      "grad_norm": 0.23796658217906952,
      "learning_rate": 0.00012703534918093363,
      "loss": 0.2051,
      "step": 2967
    },
    {
      "epoch": 1.0960118168389956,
      "grad_norm": 0.3309081792831421,
      "learning_rate": 0.00012701071560537012,
      "loss": 0.2744,
      "step": 2968
    },
    {
      "epoch": 1.096381093057607,
      "grad_norm": 0.3095639944076538,
      "learning_rate": 0.00012698608202980663,
      "loss": 0.2843,
      "step": 2969
    },
    {
      "epoch": 1.0967503692762186,
      "grad_norm": 0.3181617558002472,
      "learning_rate": 0.00012696144845424315,
      "loss": 0.239,
      "step": 2970
    },
    {
      "epoch": 1.0971196454948302,
      "grad_norm": 0.23281103372573853,
      "learning_rate": 0.00012693681487867966,
      "loss": 0.216,
      "step": 2971
    },
    {
      "epoch": 1.0974889217134416,
      "grad_norm": 0.38004666566848755,
      "learning_rate": 0.00012691218130311615,
      "loss": 0.3023,
      "step": 2972
    },
    {
      "epoch": 1.0978581979320532,
      "grad_norm": 0.24479223787784576,
      "learning_rate": 0.00012688754772755266,
      "loss": 0.2248,
      "step": 2973
    },
    {
      "epoch": 1.0982274741506648,
      "grad_norm": 0.2372145652770996,
      "learning_rate": 0.00012686291415198915,
      "loss": 0.2228,
      "step": 2974
    },
    {
      "epoch": 1.0985967503692762,
      "grad_norm": 0.2958376407623291,
      "learning_rate": 0.0001268382805764257,
      "loss": 0.2777,
      "step": 2975
    },
    {
      "epoch": 1.0989660265878878,
      "grad_norm": 0.3016323447227478,
      "learning_rate": 0.00012681364700086218,
      "loss": 0.2612,
      "step": 2976
    },
    {
      "epoch": 1.0993353028064992,
      "grad_norm": 0.2763654887676239,
      "learning_rate": 0.0001267890134252987,
      "loss": 0.2851,
      "step": 2977
    },
    {
      "epoch": 1.0997045790251108,
      "grad_norm": 0.25004446506500244,
      "learning_rate": 0.00012676437984973518,
      "loss": 0.3329,
      "step": 2978
    },
    {
      "epoch": 1.1000738552437224,
      "grad_norm": 0.25913912057876587,
      "learning_rate": 0.0001267397462741717,
      "loss": 0.2259,
      "step": 2979
    },
    {
      "epoch": 1.1004431314623337,
      "grad_norm": 0.23761507868766785,
      "learning_rate": 0.0001267151126986082,
      "loss": 0.2219,
      "step": 2980
    },
    {
      "epoch": 1.1008124076809453,
      "grad_norm": 0.2535478174686432,
      "learning_rate": 0.00012669047912304473,
      "loss": 0.2359,
      "step": 2981
    },
    {
      "epoch": 1.101181683899557,
      "grad_norm": 0.3021339476108551,
      "learning_rate": 0.00012666584554748122,
      "loss": 0.2442,
      "step": 2982
    },
    {
      "epoch": 1.1015509601181683,
      "grad_norm": 0.27801018953323364,
      "learning_rate": 0.00012664121197191773,
      "loss": 0.2425,
      "step": 2983
    },
    {
      "epoch": 1.10192023633678,
      "grad_norm": 0.2920657992362976,
      "learning_rate": 0.00012661657839635424,
      "loss": 0.2554,
      "step": 2984
    },
    {
      "epoch": 1.1022895125553913,
      "grad_norm": 0.28303468227386475,
      "learning_rate": 0.00012659194482079076,
      "loss": 0.2404,
      "step": 2985
    },
    {
      "epoch": 1.102658788774003,
      "grad_norm": 0.2457839995622635,
      "learning_rate": 0.00012656731124522725,
      "loss": 0.2542,
      "step": 2986
    },
    {
      "epoch": 1.1030280649926145,
      "grad_norm": 0.25335025787353516,
      "learning_rate": 0.00012654267766966376,
      "loss": 0.2328,
      "step": 2987
    },
    {
      "epoch": 1.103397341211226,
      "grad_norm": 0.23906651139259338,
      "learning_rate": 0.00012651804409410028,
      "loss": 0.2178,
      "step": 2988
    },
    {
      "epoch": 1.1037666174298375,
      "grad_norm": 0.2866811752319336,
      "learning_rate": 0.0001264934105185368,
      "loss": 0.2498,
      "step": 2989
    },
    {
      "epoch": 1.104135893648449,
      "grad_norm": 0.2759389579296112,
      "learning_rate": 0.00012646877694297328,
      "loss": 0.2734,
      "step": 2990
    },
    {
      "epoch": 1.1045051698670605,
      "grad_norm": 0.3065519630908966,
      "learning_rate": 0.0001264441433674098,
      "loss": 0.2538,
      "step": 2991
    },
    {
      "epoch": 1.104874446085672,
      "grad_norm": 0.2824118435382843,
      "learning_rate": 0.00012641950979184628,
      "loss": 0.238,
      "step": 2992
    },
    {
      "epoch": 1.1052437223042837,
      "grad_norm": 0.32216399908065796,
      "learning_rate": 0.00012639487621628282,
      "loss": 0.2665,
      "step": 2993
    },
    {
      "epoch": 1.105612998522895,
      "grad_norm": 0.23793873190879822,
      "learning_rate": 0.0001263702426407193,
      "loss": 0.2157,
      "step": 2994
    },
    {
      "epoch": 1.1059822747415067,
      "grad_norm": 0.27395644783973694,
      "learning_rate": 0.00012634560906515582,
      "loss": 0.2214,
      "step": 2995
    },
    {
      "epoch": 1.106351550960118,
      "grad_norm": 0.23240530490875244,
      "learning_rate": 0.0001263209754895923,
      "loss": 0.2248,
      "step": 2996
    },
    {
      "epoch": 1.1067208271787297,
      "grad_norm": 0.2937382459640503,
      "learning_rate": 0.00012629634191402883,
      "loss": 0.2368,
      "step": 2997
    },
    {
      "epoch": 1.1070901033973413,
      "grad_norm": 0.34913092851638794,
      "learning_rate": 0.00012627170833846534,
      "loss": 0.2829,
      "step": 2998
    },
    {
      "epoch": 1.1074593796159526,
      "grad_norm": 0.2377249151468277,
      "learning_rate": 0.00012624707476290186,
      "loss": 0.1998,
      "step": 2999
    },
    {
      "epoch": 1.1078286558345642,
      "grad_norm": 0.321977823972702,
      "learning_rate": 0.00012622244118733834,
      "loss": 0.243,
      "step": 3000
    },
    {
      "epoch": 1.1078286558345642,
      "eval_loss": 0.2727716565132141,
      "eval_runtime": 5.8782,
      "eval_samples_per_second": 8.506,
      "eval_steps_per_second": 1.191,
      "step": 3000
    },
    {
      "epoch": 1.1081979320531758,
      "grad_norm": 0.2637842297554016,
      "learning_rate": 0.00012619780761177486,
      "loss": 0.2128,
      "step": 3001
    },
    {
      "epoch": 1.1085672082717872,
      "grad_norm": 0.24146443605422974,
      "learning_rate": 0.00012617317403621137,
      "loss": 0.2346,
      "step": 3002
    },
    {
      "epoch": 1.1089364844903988,
      "grad_norm": 0.3135768473148346,
      "learning_rate": 0.0001261485404606479,
      "loss": 0.2604,
      "step": 3003
    },
    {
      "epoch": 1.1093057607090104,
      "grad_norm": 0.24217776954174042,
      "learning_rate": 0.00012612390688508437,
      "loss": 0.1956,
      "step": 3004
    },
    {
      "epoch": 1.1096750369276218,
      "grad_norm": 0.280843049287796,
      "learning_rate": 0.0001260992733095209,
      "loss": 0.2541,
      "step": 3005
    },
    {
      "epoch": 1.1100443131462334,
      "grad_norm": 0.2333623617887497,
      "learning_rate": 0.00012607463973395738,
      "loss": 0.2471,
      "step": 3006
    },
    {
      "epoch": 1.1104135893648448,
      "grad_norm": 0.2542230784893036,
      "learning_rate": 0.00012605000615839392,
      "loss": 0.2015,
      "step": 3007
    },
    {
      "epoch": 1.1107828655834564,
      "grad_norm": 0.3016259968280792,
      "learning_rate": 0.0001260253725828304,
      "loss": 0.2298,
      "step": 3008
    },
    {
      "epoch": 1.111152141802068,
      "grad_norm": 0.28542524576187134,
      "learning_rate": 0.00012600073900726692,
      "loss": 0.2378,
      "step": 3009
    },
    {
      "epoch": 1.1115214180206794,
      "grad_norm": 0.29506421089172363,
      "learning_rate": 0.0001259761054317034,
      "loss": 0.2443,
      "step": 3010
    },
    {
      "epoch": 1.111890694239291,
      "grad_norm": 0.2600952088832855,
      "learning_rate": 0.00012595147185613992,
      "loss": 0.2239,
      "step": 3011
    },
    {
      "epoch": 1.1122599704579026,
      "grad_norm": 0.2315218448638916,
      "learning_rate": 0.00012592683828057644,
      "loss": 0.2431,
      "step": 3012
    },
    {
      "epoch": 1.112629246676514,
      "grad_norm": 0.26650896668434143,
      "learning_rate": 0.00012590220470501295,
      "loss": 0.1982,
      "step": 3013
    },
    {
      "epoch": 1.1129985228951256,
      "grad_norm": 0.29857608675956726,
      "learning_rate": 0.00012587757112944944,
      "loss": 0.2117,
      "step": 3014
    },
    {
      "epoch": 1.1133677991137372,
      "grad_norm": 0.35646194219589233,
      "learning_rate": 0.00012585293755388595,
      "loss": 0.3068,
      "step": 3015
    },
    {
      "epoch": 1.1137370753323486,
      "grad_norm": 0.28620585799217224,
      "learning_rate": 0.00012582830397832247,
      "loss": 0.2126,
      "step": 3016
    },
    {
      "epoch": 1.1141063515509602,
      "grad_norm": 0.28611883521080017,
      "learning_rate": 0.00012580367040275898,
      "loss": 0.2815,
      "step": 3017
    },
    {
      "epoch": 1.1144756277695715,
      "grad_norm": 0.2662048041820526,
      "learning_rate": 0.00012577903682719547,
      "loss": 0.2568,
      "step": 3018
    },
    {
      "epoch": 1.1148449039881831,
      "grad_norm": 0.2725636661052704,
      "learning_rate": 0.00012575440325163199,
      "loss": 0.2119,
      "step": 3019
    },
    {
      "epoch": 1.1152141802067947,
      "grad_norm": 0.24701742827892303,
      "learning_rate": 0.0001257297696760685,
      "loss": 0.2022,
      "step": 3020
    },
    {
      "epoch": 1.1155834564254061,
      "grad_norm": 0.25611329078674316,
      "learning_rate": 0.000125705136100505,
      "loss": 0.2262,
      "step": 3021
    },
    {
      "epoch": 1.1159527326440177,
      "grad_norm": 0.2516343593597412,
      "learning_rate": 0.0001256805025249415,
      "loss": 0.2067,
      "step": 3022
    },
    {
      "epoch": 1.1163220088626293,
      "grad_norm": 0.30316340923309326,
      "learning_rate": 0.000125655868949378,
      "loss": 0.272,
      "step": 3023
    },
    {
      "epoch": 1.1166912850812407,
      "grad_norm": 0.23739416897296906,
      "learning_rate": 0.0001256312353738145,
      "loss": 0.2276,
      "step": 3024
    },
    {
      "epoch": 1.1170605612998523,
      "grad_norm": 0.22895297408103943,
      "learning_rate": 0.00012560660179825102,
      "loss": 0.2076,
      "step": 3025
    },
    {
      "epoch": 1.117429837518464,
      "grad_norm": 0.23244740068912506,
      "learning_rate": 0.00012558196822268753,
      "loss": 0.2225,
      "step": 3026
    },
    {
      "epoch": 1.1177991137370753,
      "grad_norm": 0.2905164659023285,
      "learning_rate": 0.00012555733464712402,
      "loss": 0.2649,
      "step": 3027
    },
    {
      "epoch": 1.118168389955687,
      "grad_norm": 0.287203848361969,
      "learning_rate": 0.00012553270107156054,
      "loss": 0.2411,
      "step": 3028
    },
    {
      "epoch": 1.1185376661742983,
      "grad_norm": 0.21558254957199097,
      "learning_rate": 0.00012550806749599705,
      "loss": 0.1992,
      "step": 3029
    },
    {
      "epoch": 1.1189069423929099,
      "grad_norm": 0.23100394010543823,
      "learning_rate": 0.00012548343392043357,
      "loss": 0.2251,
      "step": 3030
    },
    {
      "epoch": 1.1192762186115215,
      "grad_norm": 0.2505006492137909,
      "learning_rate": 0.00012545880034487005,
      "loss": 0.2372,
      "step": 3031
    },
    {
      "epoch": 1.1196454948301329,
      "grad_norm": 0.2551506459712982,
      "learning_rate": 0.00012543416676930657,
      "loss": 0.212,
      "step": 3032
    },
    {
      "epoch": 1.1200147710487445,
      "grad_norm": 0.30516448616981506,
      "learning_rate": 0.00012540953319374306,
      "loss": 0.299,
      "step": 3033
    },
    {
      "epoch": 1.120384047267356,
      "grad_norm": 0.28968724608421326,
      "learning_rate": 0.0001253848996181796,
      "loss": 0.2338,
      "step": 3034
    },
    {
      "epoch": 1.1207533234859675,
      "grad_norm": 0.2573033273220062,
      "learning_rate": 0.00012536026604261608,
      "loss": 0.2352,
      "step": 3035
    },
    {
      "epoch": 1.121122599704579,
      "grad_norm": 0.26480695605278015,
      "learning_rate": 0.0001253356324670526,
      "loss": 0.2167,
      "step": 3036
    },
    {
      "epoch": 1.1214918759231907,
      "grad_norm": 0.2624709904193878,
      "learning_rate": 0.0001253109988914891,
      "loss": 0.2233,
      "step": 3037
    },
    {
      "epoch": 1.121861152141802,
      "grad_norm": 0.32714954018592834,
      "learning_rate": 0.0001252863653159256,
      "loss": 0.2943,
      "step": 3038
    },
    {
      "epoch": 1.1222304283604136,
      "grad_norm": 0.333815336227417,
      "learning_rate": 0.00012526173174036212,
      "loss": 0.2691,
      "step": 3039
    },
    {
      "epoch": 1.122599704579025,
      "grad_norm": 0.3042587637901306,
      "learning_rate": 0.00012523709816479863,
      "loss": 0.2645,
      "step": 3040
    },
    {
      "epoch": 1.1229689807976366,
      "grad_norm": 0.23870015144348145,
      "learning_rate": 0.00012521246458923512,
      "loss": 0.2264,
      "step": 3041
    },
    {
      "epoch": 1.1233382570162482,
      "grad_norm": 0.25744694471359253,
      "learning_rate": 0.00012518783101367163,
      "loss": 0.2359,
      "step": 3042
    },
    {
      "epoch": 1.1237075332348596,
      "grad_norm": 0.3124404549598694,
      "learning_rate": 0.00012516319743810815,
      "loss": 0.2354,
      "step": 3043
    },
    {
      "epoch": 1.1240768094534712,
      "grad_norm": 0.24731005728244781,
      "learning_rate": 0.00012513856386254466,
      "loss": 0.2177,
      "step": 3044
    },
    {
      "epoch": 1.1244460856720828,
      "grad_norm": 0.3020634353160858,
      "learning_rate": 0.00012511393028698115,
      "loss": 0.2779,
      "step": 3045
    },
    {
      "epoch": 1.1248153618906942,
      "grad_norm": 0.2442103922367096,
      "learning_rate": 0.00012508929671141766,
      "loss": 0.2165,
      "step": 3046
    },
    {
      "epoch": 1.1251846381093058,
      "grad_norm": 0.3287731409072876,
      "learning_rate": 0.00012506466313585418,
      "loss": 0.2607,
      "step": 3047
    },
    {
      "epoch": 1.1255539143279174,
      "grad_norm": 0.2367086112499237,
      "learning_rate": 0.0001250400295602907,
      "loss": 0.2101,
      "step": 3048
    },
    {
      "epoch": 1.1259231905465288,
      "grad_norm": 0.25181809067726135,
      "learning_rate": 0.00012501539598472718,
      "loss": 0.2225,
      "step": 3049
    },
    {
      "epoch": 1.1262924667651404,
      "grad_norm": 0.2873280644416809,
      "learning_rate": 0.0001249907624091637,
      "loss": 0.2248,
      "step": 3050
    },
    {
      "epoch": 1.1262924667651404,
      "eval_loss": 0.2710103690624237,
      "eval_runtime": 5.8492,
      "eval_samples_per_second": 8.548,
      "eval_steps_per_second": 1.197,
      "step": 3050
    },
    {
      "epoch": 1.1266617429837518,
      "grad_norm": 0.254673033952713,
      "learning_rate": 0.00012496612883360018,
      "loss": 0.24,
      "step": 3051
    },
    {
      "epoch": 1.1270310192023634,
      "grad_norm": 0.3037097454071045,
      "learning_rate": 0.00012494149525803672,
      "loss": 0.2916,
      "step": 3052
    },
    {
      "epoch": 1.127400295420975,
      "grad_norm": 0.23909220099449158,
      "learning_rate": 0.0001249168616824732,
      "loss": 0.2091,
      "step": 3053
    },
    {
      "epoch": 1.1277695716395864,
      "grad_norm": 0.3136500418186188,
      "learning_rate": 0.00012489222810690973,
      "loss": 0.272,
      "step": 3054
    },
    {
      "epoch": 1.128138847858198,
      "grad_norm": 0.2772284150123596,
      "learning_rate": 0.00012486759453134621,
      "loss": 0.2307,
      "step": 3055
    },
    {
      "epoch": 1.1285081240768093,
      "grad_norm": 0.25221678614616394,
      "learning_rate": 0.00012484296095578273,
      "loss": 0.2367,
      "step": 3056
    },
    {
      "epoch": 1.128877400295421,
      "grad_norm": 0.25936242938041687,
      "learning_rate": 0.00012481832738021924,
      "loss": 0.2589,
      "step": 3057
    },
    {
      "epoch": 1.1292466765140325,
      "grad_norm": 0.28311699628829956,
      "learning_rate": 0.00012479369380465576,
      "loss": 0.2371,
      "step": 3058
    },
    {
      "epoch": 1.129615952732644,
      "grad_norm": 0.3582422435283661,
      "learning_rate": 0.00012476906022909225,
      "loss": 0.2705,
      "step": 3059
    },
    {
      "epoch": 1.1299852289512555,
      "grad_norm": 0.2714608907699585,
      "learning_rate": 0.00012474442665352876,
      "loss": 0.2387,
      "step": 3060
    },
    {
      "epoch": 1.1303545051698671,
      "grad_norm": 0.3010917603969574,
      "learning_rate": 0.00012471979307796528,
      "loss": 0.2948,
      "step": 3061
    },
    {
      "epoch": 1.1307237813884785,
      "grad_norm": 0.308224618434906,
      "learning_rate": 0.0001246951595024018,
      "loss": 0.2862,
      "step": 3062
    },
    {
      "epoch": 1.1310930576070901,
      "grad_norm": 0.2517126798629761,
      "learning_rate": 0.00012467052592683828,
      "loss": 0.2661,
      "step": 3063
    },
    {
      "epoch": 1.1314623338257017,
      "grad_norm": 0.27293944358825684,
      "learning_rate": 0.0001246458923512748,
      "loss": 0.2047,
      "step": 3064
    },
    {
      "epoch": 1.131831610044313,
      "grad_norm": 0.2842094898223877,
      "learning_rate": 0.00012462125877571128,
      "loss": 0.2204,
      "step": 3065
    },
    {
      "epoch": 1.1322008862629247,
      "grad_norm": 0.25981253385543823,
      "learning_rate": 0.00012459662520014782,
      "loss": 0.2146,
      "step": 3066
    },
    {
      "epoch": 1.132570162481536,
      "grad_norm": 0.2736140191555023,
      "learning_rate": 0.0001245719916245843,
      "loss": 0.266,
      "step": 3067
    },
    {
      "epoch": 1.1329394387001477,
      "grad_norm": 0.35096457600593567,
      "learning_rate": 0.00012454735804902082,
      "loss": 0.2942,
      "step": 3068
    },
    {
      "epoch": 1.1333087149187593,
      "grad_norm": 0.26719680428504944,
      "learning_rate": 0.0001245227244734573,
      "loss": 0.2735,
      "step": 3069
    },
    {
      "epoch": 1.1336779911373707,
      "grad_norm": 0.2518094778060913,
      "learning_rate": 0.00012449809089789383,
      "loss": 0.2328,
      "step": 3070
    },
    {
      "epoch": 1.1340472673559823,
      "grad_norm": 0.30160000920295715,
      "learning_rate": 0.00012447345732233034,
      "loss": 0.2994,
      "step": 3071
    },
    {
      "epoch": 1.1344165435745939,
      "grad_norm": 0.24626970291137695,
      "learning_rate": 0.00012444882374676686,
      "loss": 0.2315,
      "step": 3072
    },
    {
      "epoch": 1.1347858197932053,
      "grad_norm": 0.25943630933761597,
      "learning_rate": 0.00012442419017120334,
      "loss": 0.2437,
      "step": 3073
    },
    {
      "epoch": 1.1351550960118169,
      "grad_norm": 0.21921169757843018,
      "learning_rate": 0.00012439955659563986,
      "loss": 0.2081,
      "step": 3074
    },
    {
      "epoch": 1.1355243722304285,
      "grad_norm": 0.26043611764907837,
      "learning_rate": 0.00012437492302007637,
      "loss": 0.2137,
      "step": 3075
    },
    {
      "epoch": 1.1358936484490398,
      "grad_norm": 0.26178666949272156,
      "learning_rate": 0.0001243502894445129,
      "loss": 0.2058,
      "step": 3076
    },
    {
      "epoch": 1.1362629246676514,
      "grad_norm": 0.3557722866535187,
      "learning_rate": 0.00012432565586894937,
      "loss": 0.2798,
      "step": 3077
    },
    {
      "epoch": 1.1366322008862628,
      "grad_norm": 0.3032893240451813,
      "learning_rate": 0.0001243010222933859,
      "loss": 0.2662,
      "step": 3078
    },
    {
      "epoch": 1.1370014771048744,
      "grad_norm": 0.25780877470970154,
      "learning_rate": 0.00012427638871782238,
      "loss": 0.2378,
      "step": 3079
    },
    {
      "epoch": 1.137370753323486,
      "grad_norm": 0.2566320598125458,
      "learning_rate": 0.00012425175514225892,
      "loss": 0.235,
      "step": 3080
    },
    {
      "epoch": 1.1377400295420974,
      "grad_norm": 0.3095914125442505,
      "learning_rate": 0.0001242271215666954,
      "loss": 0.2498,
      "step": 3081
    },
    {
      "epoch": 1.138109305760709,
      "grad_norm": 0.3422093689441681,
      "learning_rate": 0.00012420248799113192,
      "loss": 0.2715,
      "step": 3082
    },
    {
      "epoch": 1.1384785819793206,
      "grad_norm": 0.24209055304527283,
      "learning_rate": 0.0001241778544155684,
      "loss": 0.219,
      "step": 3083
    },
    {
      "epoch": 1.138847858197932,
      "grad_norm": 0.4152233898639679,
      "learning_rate": 0.00012415322084000492,
      "loss": 0.2235,
      "step": 3084
    },
    {
      "epoch": 1.1392171344165436,
      "grad_norm": 0.26387819647789,
      "learning_rate": 0.00012412858726444144,
      "loss": 0.2195,
      "step": 3085
    },
    {
      "epoch": 1.1395864106351552,
      "grad_norm": 0.251911997795105,
      "learning_rate": 0.00012410395368887795,
      "loss": 0.2147,
      "step": 3086
    },
    {
      "epoch": 1.1399556868537666,
      "grad_norm": 0.2334921807050705,
      "learning_rate": 0.00012407932011331444,
      "loss": 0.2033,
      "step": 3087
    },
    {
      "epoch": 1.1403249630723782,
      "grad_norm": 0.28513142466545105,
      "learning_rate": 0.00012405468653775095,
      "loss": 0.2355,
      "step": 3088
    },
    {
      "epoch": 1.1406942392909896,
      "grad_norm": 0.22765091061592102,
      "learning_rate": 0.00012403005296218747,
      "loss": 0.2028,
      "step": 3089
    },
    {
      "epoch": 1.1410635155096012,
      "grad_norm": 0.224373921751976,
      "learning_rate": 0.00012400541938662398,
      "loss": 0.1976,
      "step": 3090
    },
    {
      "epoch": 1.1414327917282128,
      "grad_norm": 0.3062220811843872,
      "learning_rate": 0.00012398078581106047,
      "loss": 0.2535,
      "step": 3091
    },
    {
      "epoch": 1.1418020679468242,
      "grad_norm": 0.259894996881485,
      "learning_rate": 0.00012395615223549699,
      "loss": 0.2289,
      "step": 3092
    },
    {
      "epoch": 1.1421713441654358,
      "grad_norm": 0.24251849949359894,
      "learning_rate": 0.0001239315186599335,
      "loss": 0.2256,
      "step": 3093
    },
    {
      "epoch": 1.1425406203840474,
      "grad_norm": 0.27573704719543457,
      "learning_rate": 0.00012390688508437001,
      "loss": 0.2466,
      "step": 3094
    },
    {
      "epoch": 1.1429098966026587,
      "grad_norm": 0.29267674684524536,
      "learning_rate": 0.0001238822515088065,
      "loss": 0.2606,
      "step": 3095
    },
    {
      "epoch": 1.1432791728212703,
      "grad_norm": 0.253482848405838,
      "learning_rate": 0.00012385761793324302,
      "loss": 0.2301,
      "step": 3096
    },
    {
      "epoch": 1.143648449039882,
      "grad_norm": 0.2747405469417572,
      "learning_rate": 0.0001238329843576795,
      "loss": 0.2733,
      "step": 3097
    },
    {
      "epoch": 1.1440177252584933,
      "grad_norm": 0.26296141743659973,
      "learning_rate": 0.00012380835078211605,
      "loss": 0.2231,
      "step": 3098
    },
    {
      "epoch": 1.144387001477105,
      "grad_norm": 0.3350454568862915,
      "learning_rate": 0.00012378371720655253,
      "loss": 0.2461,
      "step": 3099
    },
    {
      "epoch": 1.1447562776957163,
      "grad_norm": 0.2649227976799011,
      "learning_rate": 0.00012375908363098905,
      "loss": 0.2802,
      "step": 3100
    },
    {
      "epoch": 1.1447562776957163,
      "eval_loss": 0.2746792435646057,
      "eval_runtime": 5.8569,
      "eval_samples_per_second": 8.537,
      "eval_steps_per_second": 1.195,
      "step": 3100
    },
    {
      "epoch": 1.145125553914328,
      "grad_norm": 0.34254997968673706,
      "learning_rate": 0.00012373445005542554,
      "loss": 0.2205,
      "step": 3101
    },
    {
      "epoch": 1.1454948301329395,
      "grad_norm": 0.29163575172424316,
      "learning_rate": 0.00012370981647986205,
      "loss": 0.2629,
      "step": 3102
    },
    {
      "epoch": 1.145864106351551,
      "grad_norm": 0.2712344527244568,
      "learning_rate": 0.00012368518290429856,
      "loss": 0.2303,
      "step": 3103
    },
    {
      "epoch": 1.1462333825701625,
      "grad_norm": 0.27596956491470337,
      "learning_rate": 0.00012366054932873508,
      "loss": 0.2243,
      "step": 3104
    },
    {
      "epoch": 1.146602658788774,
      "grad_norm": 0.253072589635849,
      "learning_rate": 0.00012363591575317157,
      "loss": 0.2265,
      "step": 3105
    },
    {
      "epoch": 1.1469719350073855,
      "grad_norm": 0.2831348776817322,
      "learning_rate": 0.00012361128217760808,
      "loss": 0.2431,
      "step": 3106
    },
    {
      "epoch": 1.147341211225997,
      "grad_norm": 0.25114157795906067,
      "learning_rate": 0.0001235866486020446,
      "loss": 0.2225,
      "step": 3107
    },
    {
      "epoch": 1.1477104874446087,
      "grad_norm": 0.25578370690345764,
      "learning_rate": 0.0001235620150264811,
      "loss": 0.2463,
      "step": 3108
    },
    {
      "epoch": 1.14807976366322,
      "grad_norm": 0.2710251212120056,
      "learning_rate": 0.0001235373814509176,
      "loss": 0.1833,
      "step": 3109
    },
    {
      "epoch": 1.1484490398818317,
      "grad_norm": 0.26096826791763306,
      "learning_rate": 0.0001235127478753541,
      "loss": 0.2136,
      "step": 3110
    },
    {
      "epoch": 1.148818316100443,
      "grad_norm": 0.30772802233695984,
      "learning_rate": 0.0001234881142997906,
      "loss": 0.2743,
      "step": 3111
    },
    {
      "epoch": 1.1491875923190547,
      "grad_norm": 0.33691519498825073,
      "learning_rate": 0.00012346348072422714,
      "loss": 0.3105,
      "step": 3112
    },
    {
      "epoch": 1.1495568685376663,
      "grad_norm": 0.2449200302362442,
      "learning_rate": 0.00012343884714866363,
      "loss": 0.2234,
      "step": 3113
    },
    {
      "epoch": 1.1499261447562776,
      "grad_norm": 0.29358598589897156,
      "learning_rate": 0.00012341421357310014,
      "loss": 0.2595,
      "step": 3114
    },
    {
      "epoch": 1.1502954209748892,
      "grad_norm": 0.3605344295501709,
      "learning_rate": 0.00012338957999753663,
      "loss": 0.3003,
      "step": 3115
    },
    {
      "epoch": 1.1506646971935006,
      "grad_norm": 0.2726616859436035,
      "learning_rate": 0.00012336494642197315,
      "loss": 0.2966,
      "step": 3116
    },
    {
      "epoch": 1.1510339734121122,
      "grad_norm": 0.28332284092903137,
      "learning_rate": 0.00012334031284640966,
      "loss": 0.2501,
      "step": 3117
    },
    {
      "epoch": 1.1514032496307238,
      "grad_norm": 0.2538849711418152,
      "learning_rate": 0.00012331567927084618,
      "loss": 0.2295,
      "step": 3118
    },
    {
      "epoch": 1.1517725258493354,
      "grad_norm": 0.24772146344184875,
      "learning_rate": 0.00012329104569528266,
      "loss": 0.2019,
      "step": 3119
    },
    {
      "epoch": 1.1521418020679468,
      "grad_norm": 0.24297967553138733,
      "learning_rate": 0.00012326641211971918,
      "loss": 0.2096,
      "step": 3120
    },
    {
      "epoch": 1.1525110782865584,
      "grad_norm": 0.2873733341693878,
      "learning_rate": 0.0001232417785441557,
      "loss": 0.2034,
      "step": 3121
    },
    {
      "epoch": 1.1528803545051698,
      "grad_norm": 0.2223925143480301,
      "learning_rate": 0.0001232171449685922,
      "loss": 0.2291,
      "step": 3122
    },
    {
      "epoch": 1.1532496307237814,
      "grad_norm": 0.2619134187698364,
      "learning_rate": 0.0001231925113930287,
      "loss": 0.2475,
      "step": 3123
    },
    {
      "epoch": 1.153618906942393,
      "grad_norm": 0.29636234045028687,
      "learning_rate": 0.0001231678778174652,
      "loss": 0.2267,
      "step": 3124
    },
    {
      "epoch": 1.1539881831610044,
      "grad_norm": 0.233274906873703,
      "learning_rate": 0.00012314324424190172,
      "loss": 0.2162,
      "step": 3125
    },
    {
      "epoch": 1.154357459379616,
      "grad_norm": 0.2567541301250458,
      "learning_rate": 0.00012311861066633824,
      "loss": 0.2105,
      "step": 3126
    },
    {
      "epoch": 1.1547267355982274,
      "grad_norm": 0.28177598118782043,
      "learning_rate": 0.00012309397709077473,
      "loss": 0.2777,
      "step": 3127
    },
    {
      "epoch": 1.155096011816839,
      "grad_norm": 0.295654296875,
      "learning_rate": 0.00012306934351521124,
      "loss": 0.2765,
      "step": 3128
    },
    {
      "epoch": 1.1554652880354506,
      "grad_norm": 0.2781972587108612,
      "learning_rate": 0.00012304470993964773,
      "loss": 0.2207,
      "step": 3129
    },
    {
      "epoch": 1.155834564254062,
      "grad_norm": 0.3001963794231415,
      "learning_rate": 0.00012302007636408427,
      "loss": 0.3032,
      "step": 3130
    },
    {
      "epoch": 1.1562038404726735,
      "grad_norm": 0.2637580633163452,
      "learning_rate": 0.00012299544278852076,
      "loss": 0.2143,
      "step": 3131
    },
    {
      "epoch": 1.1565731166912852,
      "grad_norm": 0.2356804609298706,
      "learning_rate": 0.00012297080921295727,
      "loss": 0.2345,
      "step": 3132
    },
    {
      "epoch": 1.1569423929098965,
      "grad_norm": 0.250374972820282,
      "learning_rate": 0.00012294617563739376,
      "loss": 0.2176,
      "step": 3133
    },
    {
      "epoch": 1.1573116691285081,
      "grad_norm": 0.3240339159965515,
      "learning_rate": 0.00012292154206183027,
      "loss": 0.251,
      "step": 3134
    },
    {
      "epoch": 1.1576809453471197,
      "grad_norm": 0.3229665756225586,
      "learning_rate": 0.0001228969084862668,
      "loss": 0.2673,
      "step": 3135
    },
    {
      "epoch": 1.1580502215657311,
      "grad_norm": 0.3183058202266693,
      "learning_rate": 0.0001228722749107033,
      "loss": 0.27,
      "step": 3136
    },
    {
      "epoch": 1.1584194977843427,
      "grad_norm": 0.26968470215797424,
      "learning_rate": 0.0001228476413351398,
      "loss": 0.2282,
      "step": 3137
    },
    {
      "epoch": 1.158788774002954,
      "grad_norm": 0.3959614634513855,
      "learning_rate": 0.0001228230077595763,
      "loss": 0.3621,
      "step": 3138
    },
    {
      "epoch": 1.1591580502215657,
      "grad_norm": 0.28337931632995605,
      "learning_rate": 0.00012279837418401282,
      "loss": 0.2816,
      "step": 3139
    },
    {
      "epoch": 1.1595273264401773,
      "grad_norm": 0.3202999234199524,
      "learning_rate": 0.00012277374060844934,
      "loss": 0.2623,
      "step": 3140
    },
    {
      "epoch": 1.1598966026587887,
      "grad_norm": 0.30495762825012207,
      "learning_rate": 0.00012274910703288582,
      "loss": 0.2255,
      "step": 3141
    },
    {
      "epoch": 1.1602658788774003,
      "grad_norm": 0.3435508906841278,
      "learning_rate": 0.00012272447345732234,
      "loss": 0.268,
      "step": 3142
    },
    {
      "epoch": 1.160635155096012,
      "grad_norm": 0.2825329303741455,
      "learning_rate": 0.00012269983988175883,
      "loss": 0.2358,
      "step": 3143
    },
    {
      "epoch": 1.1610044313146233,
      "grad_norm": 0.2530229091644287,
      "learning_rate": 0.00012267520630619537,
      "loss": 0.2061,
      "step": 3144
    },
    {
      "epoch": 1.1613737075332349,
      "grad_norm": 0.27512991428375244,
      "learning_rate": 0.00012265057273063185,
      "loss": 0.2342,
      "step": 3145
    },
    {
      "epoch": 1.1617429837518465,
      "grad_norm": 0.26814547181129456,
      "learning_rate": 0.00012262593915506837,
      "loss": 0.256,
      "step": 3146
    },
    {
      "epoch": 1.1621122599704579,
      "grad_norm": 0.2830052971839905,
      "learning_rate": 0.00012260130557950486,
      "loss": 0.2467,
      "step": 3147
    },
    {
      "epoch": 1.1624815361890695,
      "grad_norm": 0.29037684202194214,
      "learning_rate": 0.00012257667200394137,
      "loss": 0.2384,
      "step": 3148
    },
    {
      "epoch": 1.1628508124076808,
      "grad_norm": 0.2737725079059601,
      "learning_rate": 0.00012255203842837789,
      "loss": 0.2288,
      "step": 3149
    },
    {
      "epoch": 1.1632200886262924,
      "grad_norm": 0.28996843099594116,
      "learning_rate": 0.0001225274048528144,
      "loss": 0.2843,
      "step": 3150
    },
    {
      "epoch": 1.1632200886262924,
      "eval_loss": 0.2770858108997345,
      "eval_runtime": 5.8638,
      "eval_samples_per_second": 8.527,
      "eval_steps_per_second": 1.194,
      "step": 3150
    },
    {
      "epoch": 1.163589364844904,
      "grad_norm": 0.261055052280426,
      "learning_rate": 0.0001225027712772509,
      "loss": 0.2761,
      "step": 3151
    },
    {
      "epoch": 1.1639586410635154,
      "grad_norm": 0.2340794801712036,
      "learning_rate": 0.0001224781377016874,
      "loss": 0.2253,
      "step": 3152
    },
    {
      "epoch": 1.164327917282127,
      "grad_norm": 0.31548061966896057,
      "learning_rate": 0.00012245350412612392,
      "loss": 0.2828,
      "step": 3153
    },
    {
      "epoch": 1.1646971935007386,
      "grad_norm": 0.32488757371902466,
      "learning_rate": 0.00012242887055056043,
      "loss": 0.2747,
      "step": 3154
    },
    {
      "epoch": 1.16506646971935,
      "grad_norm": 0.2568996250629425,
      "learning_rate": 0.00012240423697499692,
      "loss": 0.2313,
      "step": 3155
    },
    {
      "epoch": 1.1654357459379616,
      "grad_norm": 0.23020611703395844,
      "learning_rate": 0.00012237960339943343,
      "loss": 0.2156,
      "step": 3156
    },
    {
      "epoch": 1.1658050221565732,
      "grad_norm": 0.23148971796035767,
      "learning_rate": 0.00012235496982386995,
      "loss": 0.2153,
      "step": 3157
    },
    {
      "epoch": 1.1661742983751846,
      "grad_norm": 0.3115021288394928,
      "learning_rate": 0.00012233033624830646,
      "loss": 0.2635,
      "step": 3158
    },
    {
      "epoch": 1.1665435745937962,
      "grad_norm": 0.2591547966003418,
      "learning_rate": 0.00012230570267274295,
      "loss": 0.2355,
      "step": 3159
    },
    {
      "epoch": 1.1669128508124076,
      "grad_norm": 0.2312883585691452,
      "learning_rate": 0.00012228106909717947,
      "loss": 0.2208,
      "step": 3160
    },
    {
      "epoch": 1.1672821270310192,
      "grad_norm": 0.26675599813461304,
      "learning_rate": 0.00012225643552161595,
      "loss": 0.2463,
      "step": 3161
    },
    {
      "epoch": 1.1676514032496308,
      "grad_norm": 0.26281824707984924,
      "learning_rate": 0.0001222318019460525,
      "loss": 0.2415,
      "step": 3162
    },
    {
      "epoch": 1.1680206794682422,
      "grad_norm": 0.24059298634529114,
      "learning_rate": 0.00012220716837048898,
      "loss": 0.2339,
      "step": 3163
    },
    {
      "epoch": 1.1683899556868538,
      "grad_norm": 0.2939622700214386,
      "learning_rate": 0.0001221825347949255,
      "loss": 0.2549,
      "step": 3164
    },
    {
      "epoch": 1.1687592319054654,
      "grad_norm": 0.2592087686061859,
      "learning_rate": 0.00012215790121936198,
      "loss": 0.239,
      "step": 3165
    },
    {
      "epoch": 1.1691285081240768,
      "grad_norm": 0.26341572403907776,
      "learning_rate": 0.0001221332676437985,
      "loss": 0.2254,
      "step": 3166
    },
    {
      "epoch": 1.1694977843426884,
      "grad_norm": 0.26745542883872986,
      "learning_rate": 0.00012210863406823501,
      "loss": 0.2166,
      "step": 3167
    },
    {
      "epoch": 1.1698670605613,
      "grad_norm": 0.26928338408470154,
      "learning_rate": 0.00012208400049267153,
      "loss": 0.2149,
      "step": 3168
    },
    {
      "epoch": 1.1702363367799113,
      "grad_norm": 0.2942584156990051,
      "learning_rate": 0.00012205936691710802,
      "loss": 0.2064,
      "step": 3169
    },
    {
      "epoch": 1.170605612998523,
      "grad_norm": 0.3437819480895996,
      "learning_rate": 0.00012203473334154454,
      "loss": 0.2328,
      "step": 3170
    },
    {
      "epoch": 1.1709748892171343,
      "grad_norm": 0.2597808837890625,
      "learning_rate": 0.00012201009976598103,
      "loss": 0.2175,
      "step": 3171
    },
    {
      "epoch": 1.171344165435746,
      "grad_norm": 0.27388855814933777,
      "learning_rate": 0.00012198546619041755,
      "loss": 0.263,
      "step": 3172
    },
    {
      "epoch": 1.1717134416543575,
      "grad_norm": 0.23952798545360565,
      "learning_rate": 0.00012196083261485405,
      "loss": 0.2097,
      "step": 3173
    },
    {
      "epoch": 1.172082717872969,
      "grad_norm": 0.2443709820508957,
      "learning_rate": 0.00012193619903929056,
      "loss": 0.1985,
      "step": 3174
    },
    {
      "epoch": 1.1724519940915805,
      "grad_norm": 0.2822941541671753,
      "learning_rate": 0.00012191156546372706,
      "loss": 0.244,
      "step": 3175
    },
    {
      "epoch": 1.172821270310192,
      "grad_norm": 0.24943234026432037,
      "learning_rate": 0.00012188693188816358,
      "loss": 0.2032,
      "step": 3176
    },
    {
      "epoch": 1.1731905465288035,
      "grad_norm": 0.2702721953392029,
      "learning_rate": 0.00012186229831260008,
      "loss": 0.2123,
      "step": 3177
    },
    {
      "epoch": 1.173559822747415,
      "grad_norm": 0.2794484794139862,
      "learning_rate": 0.0001218376647370366,
      "loss": 0.2593,
      "step": 3178
    },
    {
      "epoch": 1.1739290989660267,
      "grad_norm": 0.2570948600769043,
      "learning_rate": 0.0001218130311614731,
      "loss": 0.1915,
      "step": 3179
    },
    {
      "epoch": 1.174298375184638,
      "grad_norm": 0.24439947307109833,
      "learning_rate": 0.00012178839758590961,
      "loss": 0.2451,
      "step": 3180
    },
    {
      "epoch": 1.1746676514032497,
      "grad_norm": 0.2643444836139679,
      "learning_rate": 0.00012176376401034611,
      "loss": 0.2129,
      "step": 3181
    },
    {
      "epoch": 1.175036927621861,
      "grad_norm": 0.30255910754203796,
      "learning_rate": 0.00012173913043478263,
      "loss": 0.2637,
      "step": 3182
    },
    {
      "epoch": 1.1754062038404727,
      "grad_norm": 0.3298236131668091,
      "learning_rate": 0.00012171449685921911,
      "loss": 0.2776,
      "step": 3183
    },
    {
      "epoch": 1.1757754800590843,
      "grad_norm": 0.25277990102767944,
      "learning_rate": 0.00012168986328365564,
      "loss": 0.2088,
      "step": 3184
    },
    {
      "epoch": 1.1761447562776957,
      "grad_norm": 0.2743963599205017,
      "learning_rate": 0.00012166522970809213,
      "loss": 0.2579,
      "step": 3185
    },
    {
      "epoch": 1.1765140324963073,
      "grad_norm": 0.26288869976997375,
      "learning_rate": 0.00012164059613252866,
      "loss": 0.2181,
      "step": 3186
    },
    {
      "epoch": 1.1768833087149186,
      "grad_norm": 0.26495224237442017,
      "learning_rate": 0.00012161596255696514,
      "loss": 0.2185,
      "step": 3187
    },
    {
      "epoch": 1.1772525849335302,
      "grad_norm": 0.29768168926239014,
      "learning_rate": 0.00012159132898140166,
      "loss": 0.2642,
      "step": 3188
    },
    {
      "epoch": 1.1776218611521418,
      "grad_norm": 0.2437969595193863,
      "learning_rate": 0.00012156669540583816,
      "loss": 0.2102,
      "step": 3189
    },
    {
      "epoch": 1.1779911373707532,
      "grad_norm": 0.26859331130981445,
      "learning_rate": 0.00012154206183027467,
      "loss": 0.2545,
      "step": 3190
    },
    {
      "epoch": 1.1783604135893648,
      "grad_norm": 0.2850019335746765,
      "learning_rate": 0.00012151742825471118,
      "loss": 0.2372,
      "step": 3191
    },
    {
      "epoch": 1.1787296898079764,
      "grad_norm": 0.24644847214221954,
      "learning_rate": 0.00012149279467914769,
      "loss": 0.2193,
      "step": 3192
    },
    {
      "epoch": 1.1790989660265878,
      "grad_norm": 0.2813577950000763,
      "learning_rate": 0.00012146816110358419,
      "loss": 0.2636,
      "step": 3193
    },
    {
      "epoch": 1.1794682422451994,
      "grad_norm": 0.34138408303260803,
      "learning_rate": 0.0001214435275280207,
      "loss": 0.3041,
      "step": 3194
    },
    {
      "epoch": 1.179837518463811,
      "grad_norm": 0.29984021186828613,
      "learning_rate": 0.00012141889395245721,
      "loss": 0.2123,
      "step": 3195
    },
    {
      "epoch": 1.1802067946824224,
      "grad_norm": 0.23603621125221252,
      "learning_rate": 0.00012139426037689372,
      "loss": 0.2095,
      "step": 3196
    },
    {
      "epoch": 1.180576070901034,
      "grad_norm": 0.26498091220855713,
      "learning_rate": 0.00012136962680133022,
      "loss": 0.2647,
      "step": 3197
    },
    {
      "epoch": 1.1809453471196454,
      "grad_norm": 0.282332181930542,
      "learning_rate": 0.00012134499322576674,
      "loss": 0.2369,
      "step": 3198
    },
    {
      "epoch": 1.181314623338257,
      "grad_norm": 0.2596491575241089,
      "learning_rate": 0.00012132035965020322,
      "loss": 0.2263,
      "step": 3199
    },
    {
      "epoch": 1.1816838995568686,
      "grad_norm": 0.2347802370786667,
      "learning_rate": 0.00012129572607463975,
      "loss": 0.2103,
      "step": 3200
    },
    {
      "epoch": 1.1816838995568686,
      "eval_loss": 0.27301788330078125,
      "eval_runtime": 5.8695,
      "eval_samples_per_second": 8.519,
      "eval_steps_per_second": 1.193,
      "step": 3200
    },
    {
      "epoch": 1.18205317577548,
      "grad_norm": 0.25488463044166565,
      "learning_rate": 0.00012127109249907624,
      "loss": 0.2309,
      "step": 3201
    },
    {
      "epoch": 1.1824224519940916,
      "grad_norm": 0.2890549600124359,
      "learning_rate": 0.00012124645892351277,
      "loss": 0.2486,
      "step": 3202
    },
    {
      "epoch": 1.1827917282127032,
      "grad_norm": 0.30259156227111816,
      "learning_rate": 0.00012122182534794926,
      "loss": 0.2189,
      "step": 3203
    },
    {
      "epoch": 1.1831610044313146,
      "grad_norm": 0.23023875057697296,
      "learning_rate": 0.00012119719177238577,
      "loss": 0.2202,
      "step": 3204
    },
    {
      "epoch": 1.1835302806499262,
      "grad_norm": 0.2631145119667053,
      "learning_rate": 0.00012117255819682227,
      "loss": 0.2351,
      "step": 3205
    },
    {
      "epoch": 1.1838995568685378,
      "grad_norm": 0.2502356469631195,
      "learning_rate": 0.00012114792462125879,
      "loss": 0.2403,
      "step": 3206
    },
    {
      "epoch": 1.1842688330871491,
      "grad_norm": 0.25082942843437195,
      "learning_rate": 0.00012112329104569529,
      "loss": 0.2001,
      "step": 3207
    },
    {
      "epoch": 1.1846381093057607,
      "grad_norm": 0.3217408359050751,
      "learning_rate": 0.0001210986574701318,
      "loss": 0.2396,
      "step": 3208
    },
    {
      "epoch": 1.1850073855243721,
      "grad_norm": 0.2907535135746002,
      "learning_rate": 0.0001210740238945683,
      "loss": 0.2348,
      "step": 3209
    },
    {
      "epoch": 1.1853766617429837,
      "grad_norm": 0.2987769842147827,
      "learning_rate": 0.00012104939031900482,
      "loss": 0.2543,
      "step": 3210
    },
    {
      "epoch": 1.1857459379615953,
      "grad_norm": 0.2710782289505005,
      "learning_rate": 0.00012102475674344132,
      "loss": 0.2465,
      "step": 3211
    },
    {
      "epoch": 1.1861152141802067,
      "grad_norm": 0.2633363902568817,
      "learning_rate": 0.00012100012316787783,
      "loss": 0.2367,
      "step": 3212
    },
    {
      "epoch": 1.1864844903988183,
      "grad_norm": 0.23605535924434662,
      "learning_rate": 0.00012097548959231434,
      "loss": 0.2265,
      "step": 3213
    },
    {
      "epoch": 1.18685376661743,
      "grad_norm": 0.3763718008995056,
      "learning_rate": 0.00012095085601675085,
      "loss": 0.2352,
      "step": 3214
    },
    {
      "epoch": 1.1872230428360413,
      "grad_norm": 0.2694814205169678,
      "learning_rate": 0.00012092622244118734,
      "loss": 0.2123,
      "step": 3215
    },
    {
      "epoch": 1.187592319054653,
      "grad_norm": 0.23994052410125732,
      "learning_rate": 0.00012090158886562387,
      "loss": 0.2079,
      "step": 3216
    },
    {
      "epoch": 1.1879615952732645,
      "grad_norm": 0.3044906258583069,
      "learning_rate": 0.00012087695529006035,
      "loss": 0.2245,
      "step": 3217
    },
    {
      "epoch": 1.1883308714918759,
      "grad_norm": 0.2753120958805084,
      "learning_rate": 0.00012085232171449688,
      "loss": 0.2345,
      "step": 3218
    },
    {
      "epoch": 1.1887001477104875,
      "grad_norm": 0.28482258319854736,
      "learning_rate": 0.00012082768813893337,
      "loss": 0.2458,
      "step": 3219
    },
    {
      "epoch": 1.1890694239290989,
      "grad_norm": 0.2305278182029724,
      "learning_rate": 0.00012080305456336988,
      "loss": 0.2336,
      "step": 3220
    },
    {
      "epoch": 1.1894387001477105,
      "grad_norm": 0.2535829246044159,
      "learning_rate": 0.00012077842098780638,
      "loss": 0.2363,
      "step": 3221
    },
    {
      "epoch": 1.189807976366322,
      "grad_norm": 0.2498999387025833,
      "learning_rate": 0.0001207537874122429,
      "loss": 0.2378,
      "step": 3222
    },
    {
      "epoch": 1.1901772525849335,
      "grad_norm": 0.305387943983078,
      "learning_rate": 0.0001207291538366794,
      "loss": 0.248,
      "step": 3223
    },
    {
      "epoch": 1.190546528803545,
      "grad_norm": 0.27258002758026123,
      "learning_rate": 0.00012070452026111591,
      "loss": 0.2297,
      "step": 3224
    },
    {
      "epoch": 1.1909158050221567,
      "grad_norm": 0.25035011768341064,
      "learning_rate": 0.00012067988668555242,
      "loss": 0.2447,
      "step": 3225
    },
    {
      "epoch": 1.191285081240768,
      "grad_norm": 0.26438868045806885,
      "learning_rate": 0.00012065525310998893,
      "loss": 0.1997,
      "step": 3226
    },
    {
      "epoch": 1.1916543574593796,
      "grad_norm": 0.3002438247203827,
      "learning_rate": 0.00012063061953442543,
      "loss": 0.3065,
      "step": 3227
    },
    {
      "epoch": 1.1920236336779912,
      "grad_norm": 0.3621065020561218,
      "learning_rate": 0.00012060598595886195,
      "loss": 0.2116,
      "step": 3228
    },
    {
      "epoch": 1.1923929098966026,
      "grad_norm": 0.25311121344566345,
      "learning_rate": 0.00012058135238329843,
      "loss": 0.2062,
      "step": 3229
    },
    {
      "epoch": 1.1927621861152142,
      "grad_norm": 0.21173089742660522,
      "learning_rate": 0.00012055671880773496,
      "loss": 0.1927,
      "step": 3230
    },
    {
      "epoch": 1.1931314623338256,
      "grad_norm": 0.2535032331943512,
      "learning_rate": 0.00012053208523217145,
      "loss": 0.2252,
      "step": 3231
    },
    {
      "epoch": 1.1935007385524372,
      "grad_norm": 0.3750533163547516,
      "learning_rate": 0.00012050745165660798,
      "loss": 0.2191,
      "step": 3232
    },
    {
      "epoch": 1.1938700147710488,
      "grad_norm": 0.2945975363254547,
      "learning_rate": 0.00012048281808104447,
      "loss": 0.2761,
      "step": 3233
    },
    {
      "epoch": 1.1942392909896602,
      "grad_norm": 0.25776052474975586,
      "learning_rate": 0.00012045818450548098,
      "loss": 0.2611,
      "step": 3234
    },
    {
      "epoch": 1.1946085672082718,
      "grad_norm": 0.21198132634162903,
      "learning_rate": 0.00012043355092991748,
      "loss": 0.2043,
      "step": 3235
    },
    {
      "epoch": 1.1949778434268834,
      "grad_norm": 0.24576450884342194,
      "learning_rate": 0.000120408917354354,
      "loss": 0.2492,
      "step": 3236
    },
    {
      "epoch": 1.1953471196454948,
      "grad_norm": 0.27762892842292786,
      "learning_rate": 0.0001203842837787905,
      "loss": 0.2776,
      "step": 3237
    },
    {
      "epoch": 1.1957163958641064,
      "grad_norm": 0.24789641797542572,
      "learning_rate": 0.00012035965020322701,
      "loss": 0.2199,
      "step": 3238
    },
    {
      "epoch": 1.196085672082718,
      "grad_norm": 0.24140557646751404,
      "learning_rate": 0.00012033501662766351,
      "loss": 0.2301,
      "step": 3239
    },
    {
      "epoch": 1.1964549483013294,
      "grad_norm": 0.24686704576015472,
      "learning_rate": 0.00012031038305210003,
      "loss": 0.2119,
      "step": 3240
    },
    {
      "epoch": 1.196824224519941,
      "grad_norm": 0.23982328176498413,
      "learning_rate": 0.00012028574947653653,
      "loss": 0.2204,
      "step": 3241
    },
    {
      "epoch": 1.1971935007385524,
      "grad_norm": 0.24495044350624084,
      "learning_rate": 0.00012026111590097304,
      "loss": 0.2129,
      "step": 3242
    },
    {
      "epoch": 1.197562776957164,
      "grad_norm": 0.23792871832847595,
      "learning_rate": 0.00012023648232540954,
      "loss": 0.2325,
      "step": 3243
    },
    {
      "epoch": 1.1979320531757756,
      "grad_norm": 0.2640572190284729,
      "learning_rate": 0.00012021184874984606,
      "loss": 0.211,
      "step": 3244
    },
    {
      "epoch": 1.198301329394387,
      "grad_norm": 0.28885048627853394,
      "learning_rate": 0.00012018721517428255,
      "loss": 0.2044,
      "step": 3245
    },
    {
      "epoch": 1.1986706056129985,
      "grad_norm": 0.23012898862361908,
      "learning_rate": 0.00012016258159871907,
      "loss": 0.2272,
      "step": 3246
    },
    {
      "epoch": 1.19903988183161,
      "grad_norm": 0.26595887541770935,
      "learning_rate": 0.00012013794802315556,
      "loss": 0.2221,
      "step": 3247
    },
    {
      "epoch": 1.1994091580502215,
      "grad_norm": 0.3253090977668762,
      "learning_rate": 0.00012011331444759209,
      "loss": 0.2502,
      "step": 3248
    },
    {
      "epoch": 1.1997784342688331,
      "grad_norm": 0.26922520995140076,
      "learning_rate": 0.00012008868087202858,
      "loss": 0.2376,
      "step": 3249
    },
    {
      "epoch": 1.2001477104874447,
      "grad_norm": 0.2791667878627777,
      "learning_rate": 0.00012006404729646509,
      "loss": 0.2025,
      "step": 3250
    },
    {
      "epoch": 1.2001477104874447,
      "eval_loss": 0.27150794863700867,
      "eval_runtime": 5.8659,
      "eval_samples_per_second": 8.524,
      "eval_steps_per_second": 1.193,
      "step": 3250
    },
    {
      "epoch": 1.200516986706056,
      "grad_norm": 0.2856389880180359,
      "learning_rate": 0.00012003941372090159,
      "loss": 0.2313,
      "step": 3251
    },
    {
      "epoch": 1.2008862629246677,
      "grad_norm": 0.2949007749557495,
      "learning_rate": 0.00012001478014533811,
      "loss": 0.2517,
      "step": 3252
    },
    {
      "epoch": 1.201255539143279,
      "grad_norm": 0.214605912566185,
      "learning_rate": 0.00011999014656977461,
      "loss": 0.2241,
      "step": 3253
    },
    {
      "epoch": 1.2016248153618907,
      "grad_norm": 0.22103802859783173,
      "learning_rate": 0.00011996551299421111,
      "loss": 0.2072,
      "step": 3254
    },
    {
      "epoch": 1.2019940915805023,
      "grad_norm": 0.25153616070747375,
      "learning_rate": 0.00011994087941864762,
      "loss": 0.215,
      "step": 3255
    },
    {
      "epoch": 1.2023633677991137,
      "grad_norm": 0.38636547327041626,
      "learning_rate": 0.00011991624584308411,
      "loss": 0.2629,
      "step": 3256
    },
    {
      "epoch": 1.2027326440177253,
      "grad_norm": 0.2714768648147583,
      "learning_rate": 0.00011989161226752064,
      "loss": 0.2468,
      "step": 3257
    },
    {
      "epoch": 1.2031019202363367,
      "grad_norm": 0.29080840945243835,
      "learning_rate": 0.00011986697869195713,
      "loss": 0.2382,
      "step": 3258
    },
    {
      "epoch": 1.2034711964549483,
      "grad_norm": 0.31261980533599854,
      "learning_rate": 0.00011984234511639366,
      "loss": 0.2995,
      "step": 3259
    },
    {
      "epoch": 1.2038404726735599,
      "grad_norm": 0.2760385572910309,
      "learning_rate": 0.00011981771154083014,
      "loss": 0.2316,
      "step": 3260
    },
    {
      "epoch": 1.2042097488921713,
      "grad_norm": 0.3133525848388672,
      "learning_rate": 0.00011979307796526666,
      "loss": 0.2405,
      "step": 3261
    },
    {
      "epoch": 1.2045790251107829,
      "grad_norm": 0.2454364150762558,
      "learning_rate": 0.00011976844438970316,
      "loss": 0.2457,
      "step": 3262
    },
    {
      "epoch": 1.2049483013293945,
      "grad_norm": 0.22785277664661407,
      "learning_rate": 0.00011974381081413967,
      "loss": 0.1819,
      "step": 3263
    },
    {
      "epoch": 1.2053175775480058,
      "grad_norm": 0.25301745533943176,
      "learning_rate": 0.00011971917723857618,
      "loss": 0.2416,
      "step": 3264
    },
    {
      "epoch": 1.2056868537666174,
      "grad_norm": 0.25902584195137024,
      "learning_rate": 0.00011969454366301269,
      "loss": 0.2255,
      "step": 3265
    },
    {
      "epoch": 1.206056129985229,
      "grad_norm": 0.2496347427368164,
      "learning_rate": 0.00011966991008744919,
      "loss": 0.2418,
      "step": 3266
    },
    {
      "epoch": 1.2064254062038404,
      "grad_norm": 0.3066750466823578,
      "learning_rate": 0.0001196452765118857,
      "loss": 0.284,
      "step": 3267
    },
    {
      "epoch": 1.206794682422452,
      "grad_norm": 0.28269433975219727,
      "learning_rate": 0.0001196206429363222,
      "loss": 0.2428,
      "step": 3268
    },
    {
      "epoch": 1.2071639586410634,
      "grad_norm": 0.2589324712753296,
      "learning_rate": 0.00011959600936075872,
      "loss": 0.2433,
      "step": 3269
    },
    {
      "epoch": 1.207533234859675,
      "grad_norm": 0.2615012228488922,
      "learning_rate": 0.00011957137578519522,
      "loss": 0.2566,
      "step": 3270
    },
    {
      "epoch": 1.2079025110782866,
      "grad_norm": 0.26669344305992126,
      "learning_rate": 0.00011954674220963174,
      "loss": 0.2573,
      "step": 3271
    },
    {
      "epoch": 1.208271787296898,
      "grad_norm": 0.24091607332229614,
      "learning_rate": 0.00011952210863406822,
      "loss": 0.2455,
      "step": 3272
    },
    {
      "epoch": 1.2086410635155096,
      "grad_norm": 0.2977018654346466,
      "learning_rate": 0.00011949747505850475,
      "loss": 0.2662,
      "step": 3273
    },
    {
      "epoch": 1.2090103397341212,
      "grad_norm": 0.3145335912704468,
      "learning_rate": 0.00011947284148294124,
      "loss": 0.2715,
      "step": 3274
    },
    {
      "epoch": 1.2093796159527326,
      "grad_norm": 0.2826317846775055,
      "learning_rate": 0.00011944820790737777,
      "loss": 0.3068,
      "step": 3275
    },
    {
      "epoch": 1.2097488921713442,
      "grad_norm": 0.3221205174922943,
      "learning_rate": 0.00011942357433181426,
      "loss": 0.2602,
      "step": 3276
    },
    {
      "epoch": 1.2101181683899558,
      "grad_norm": 0.33315688371658325,
      "learning_rate": 0.00011939894075625077,
      "loss": 0.3002,
      "step": 3277
    },
    {
      "epoch": 1.2104874446085672,
      "grad_norm": 0.2790661156177521,
      "learning_rate": 0.00011937430718068727,
      "loss": 0.2621,
      "step": 3278
    },
    {
      "epoch": 1.2108567208271788,
      "grad_norm": 0.27199870347976685,
      "learning_rate": 0.00011934967360512379,
      "loss": 0.244,
      "step": 3279
    },
    {
      "epoch": 1.2112259970457901,
      "grad_norm": 0.2701851427555084,
      "learning_rate": 0.00011932504002956029,
      "loss": 0.2147,
      "step": 3280
    },
    {
      "epoch": 1.2115952732644018,
      "grad_norm": 0.35070064663887024,
      "learning_rate": 0.0001193004064539968,
      "loss": 0.2465,
      "step": 3281
    },
    {
      "epoch": 1.2119645494830134,
      "grad_norm": 0.5654131770133972,
      "learning_rate": 0.0001192757728784333,
      "loss": 0.2967,
      "step": 3282
    },
    {
      "epoch": 1.2123338257016247,
      "grad_norm": 0.267313688993454,
      "learning_rate": 0.00011925113930286982,
      "loss": 0.2194,
      "step": 3283
    },
    {
      "epoch": 1.2127031019202363,
      "grad_norm": 0.2560022175312042,
      "learning_rate": 0.00011922650572730632,
      "loss": 0.2502,
      "step": 3284
    },
    {
      "epoch": 1.213072378138848,
      "grad_norm": 0.21603523194789886,
      "learning_rate": 0.00011920187215174283,
      "loss": 0.1819,
      "step": 3285
    },
    {
      "epoch": 1.2134416543574593,
      "grad_norm": 0.32343846559524536,
      "learning_rate": 0.00011917723857617933,
      "loss": 0.2671,
      "step": 3286
    },
    {
      "epoch": 1.213810930576071,
      "grad_norm": 0.3197554647922516,
      "learning_rate": 0.00011915260500061585,
      "loss": 0.2421,
      "step": 3287
    },
    {
      "epoch": 1.2141802067946825,
      "grad_norm": 0.2887294292449951,
      "learning_rate": 0.00011912797142505234,
      "loss": 0.2321,
      "step": 3288
    },
    {
      "epoch": 1.214549483013294,
      "grad_norm": 0.2647448182106018,
      "learning_rate": 0.00011910333784948886,
      "loss": 0.2153,
      "step": 3289
    },
    {
      "epoch": 1.2149187592319055,
      "grad_norm": 0.31124141812324524,
      "learning_rate": 0.00011907870427392535,
      "loss": 0.2645,
      "step": 3290
    },
    {
      "epoch": 1.215288035450517,
      "grad_norm": 0.28668105602264404,
      "learning_rate": 0.00011905407069836188,
      "loss": 0.2719,
      "step": 3291
    },
    {
      "epoch": 1.2156573116691285,
      "grad_norm": 0.2660035789012909,
      "learning_rate": 0.00011902943712279837,
      "loss": 0.2273,
      "step": 3292
    },
    {
      "epoch": 1.21602658788774,
      "grad_norm": 0.2613721787929535,
      "learning_rate": 0.00011900480354723488,
      "loss": 0.2473,
      "step": 3293
    },
    {
      "epoch": 1.2163958641063515,
      "grad_norm": 0.29387205839157104,
      "learning_rate": 0.00011898016997167138,
      "loss": 0.2348,
      "step": 3294
    },
    {
      "epoch": 1.216765140324963,
      "grad_norm": 0.3106662929058075,
      "learning_rate": 0.0001189555363961079,
      "loss": 0.2437,
      "step": 3295
    },
    {
      "epoch": 1.2171344165435747,
      "grad_norm": 0.26709648966789246,
      "learning_rate": 0.0001189309028205444,
      "loss": 0.2829,
      "step": 3296
    },
    {
      "epoch": 1.217503692762186,
      "grad_norm": 0.338077187538147,
      "learning_rate": 0.00011890626924498091,
      "loss": 0.2754,
      "step": 3297
    },
    {
      "epoch": 1.2178729689807977,
      "grad_norm": 0.23778629302978516,
      "learning_rate": 0.00011888163566941742,
      "loss": 0.2257,
      "step": 3298
    },
    {
      "epoch": 1.2182422451994093,
      "grad_norm": 0.23242737352848053,
      "learning_rate": 0.00011885700209385393,
      "loss": 0.236,
      "step": 3299
    },
    {
      "epoch": 1.2186115214180206,
      "grad_norm": 0.2840418517589569,
      "learning_rate": 0.00011883236851829043,
      "loss": 0.254,
      "step": 3300
    },
    {
      "epoch": 1.2186115214180206,
      "eval_loss": 0.271670937538147,
      "eval_runtime": 5.8605,
      "eval_samples_per_second": 8.532,
      "eval_steps_per_second": 1.194,
      "step": 3300
    },
    {
      "epoch": 1.2189807976366323,
      "grad_norm": 0.26939278841018677,
      "learning_rate": 0.00011880773494272695,
      "loss": 0.2356,
      "step": 3301
    },
    {
      "epoch": 1.2193500738552436,
      "grad_norm": 0.2695137560367584,
      "learning_rate": 0.00011878310136716345,
      "loss": 0.2563,
      "step": 3302
    },
    {
      "epoch": 1.2197193500738552,
      "grad_norm": 0.30331453680992126,
      "learning_rate": 0.00011875846779159996,
      "loss": 0.2666,
      "step": 3303
    },
    {
      "epoch": 1.2200886262924668,
      "grad_norm": 0.2266516387462616,
      "learning_rate": 0.00011873383421603645,
      "loss": 0.2413,
      "step": 3304
    },
    {
      "epoch": 1.2204579025110782,
      "grad_norm": 0.314443439245224,
      "learning_rate": 0.00011870920064047298,
      "loss": 0.2487,
      "step": 3305
    },
    {
      "epoch": 1.2208271787296898,
      "grad_norm": 0.24367497861385345,
      "learning_rate": 0.00011868456706490946,
      "loss": 0.242,
      "step": 3306
    },
    {
      "epoch": 1.2211964549483014,
      "grad_norm": 0.2384032905101776,
      "learning_rate": 0.00011865993348934599,
      "loss": 0.2482,
      "step": 3307
    },
    {
      "epoch": 1.2215657311669128,
      "grad_norm": 0.28105780482292175,
      "learning_rate": 0.00011863529991378248,
      "loss": 0.2306,
      "step": 3308
    },
    {
      "epoch": 1.2219350073855244,
      "grad_norm": 0.2741890549659729,
      "learning_rate": 0.000118610666338219,
      "loss": 0.2582,
      "step": 3309
    },
    {
      "epoch": 1.222304283604136,
      "grad_norm": 0.28679463267326355,
      "learning_rate": 0.0001185860327626555,
      "loss": 0.2671,
      "step": 3310
    },
    {
      "epoch": 1.2226735598227474,
      "grad_norm": 0.24229590594768524,
      "learning_rate": 0.00011856139918709201,
      "loss": 0.207,
      "step": 3311
    },
    {
      "epoch": 1.223042836041359,
      "grad_norm": 0.27817606925964355,
      "learning_rate": 0.00011853676561152851,
      "loss": 0.2764,
      "step": 3312
    },
    {
      "epoch": 1.2234121122599704,
      "grad_norm": 0.2285379320383072,
      "learning_rate": 0.00011851213203596503,
      "loss": 0.2185,
      "step": 3313
    },
    {
      "epoch": 1.223781388478582,
      "grad_norm": 0.2450941950082779,
      "learning_rate": 0.00011848749846040153,
      "loss": 0.2631,
      "step": 3314
    },
    {
      "epoch": 1.2241506646971936,
      "grad_norm": 0.24236904084682465,
      "learning_rate": 0.00011846286488483804,
      "loss": 0.1996,
      "step": 3315
    },
    {
      "epoch": 1.224519940915805,
      "grad_norm": 0.26536843180656433,
      "learning_rate": 0.00011843823130927454,
      "loss": 0.2282,
      "step": 3316
    },
    {
      "epoch": 1.2248892171344166,
      "grad_norm": 0.31366321444511414,
      "learning_rate": 0.00011841359773371106,
      "loss": 0.2292,
      "step": 3317
    },
    {
      "epoch": 1.225258493353028,
      "grad_norm": 0.21941806375980377,
      "learning_rate": 0.00011838896415814756,
      "loss": 0.1989,
      "step": 3318
    },
    {
      "epoch": 1.2256277695716395,
      "grad_norm": 0.2817480266094208,
      "learning_rate": 0.00011836433058258407,
      "loss": 0.2342,
      "step": 3319
    },
    {
      "epoch": 1.2259970457902511,
      "grad_norm": 0.3172677755355835,
      "learning_rate": 0.00011833969700702056,
      "loss": 0.2157,
      "step": 3320
    },
    {
      "epoch": 1.2263663220088628,
      "grad_norm": 0.2760121822357178,
      "learning_rate": 0.00011831506343145709,
      "loss": 0.223,
      "step": 3321
    },
    {
      "epoch": 1.2267355982274741,
      "grad_norm": 0.29548752307891846,
      "learning_rate": 0.00011829042985589358,
      "loss": 0.2042,
      "step": 3322
    },
    {
      "epoch": 1.2271048744460857,
      "grad_norm": 0.3663739860057831,
      "learning_rate": 0.0001182657962803301,
      "loss": 0.2965,
      "step": 3323
    },
    {
      "epoch": 1.2274741506646971,
      "grad_norm": 0.3228389024734497,
      "learning_rate": 0.00011824116270476659,
      "loss": 0.2863,
      "step": 3324
    },
    {
      "epoch": 1.2278434268833087,
      "grad_norm": 0.41276058554649353,
      "learning_rate": 0.00011821652912920311,
      "loss": 0.257,
      "step": 3325
    },
    {
      "epoch": 1.2282127031019203,
      "grad_norm": 0.2709147334098816,
      "learning_rate": 0.00011819189555363961,
      "loss": 0.2395,
      "step": 3326
    },
    {
      "epoch": 1.2285819793205317,
      "grad_norm": 0.29278793931007385,
      "learning_rate": 0.00011816726197807612,
      "loss": 0.249,
      "step": 3327
    },
    {
      "epoch": 1.2289512555391433,
      "grad_norm": 0.23940005898475647,
      "learning_rate": 0.00011814262840251262,
      "loss": 0.2156,
      "step": 3328
    },
    {
      "epoch": 1.2293205317577547,
      "grad_norm": 0.2927228808403015,
      "learning_rate": 0.00011811799482694914,
      "loss": 0.2468,
      "step": 3329
    },
    {
      "epoch": 1.2296898079763663,
      "grad_norm": 0.28324705362319946,
      "learning_rate": 0.00011809336125138564,
      "loss": 0.2062,
      "step": 3330
    },
    {
      "epoch": 1.230059084194978,
      "grad_norm": 0.2211814522743225,
      "learning_rate": 0.00011806872767582215,
      "loss": 0.2028,
      "step": 3331
    },
    {
      "epoch": 1.2304283604135893,
      "grad_norm": 0.31836825609207153,
      "learning_rate": 0.00011804409410025866,
      "loss": 0.285,
      "step": 3332
    },
    {
      "epoch": 1.2307976366322009,
      "grad_norm": 0.29719287157058716,
      "learning_rate": 0.00011801946052469517,
      "loss": 0.2157,
      "step": 3333
    },
    {
      "epoch": 1.2311669128508125,
      "grad_norm": 0.2482568621635437,
      "learning_rate": 0.00011799482694913167,
      "loss": 0.2333,
      "step": 3334
    },
    {
      "epoch": 1.2315361890694239,
      "grad_norm": 0.27771538496017456,
      "learning_rate": 0.00011797019337356819,
      "loss": 0.2354,
      "step": 3335
    },
    {
      "epoch": 1.2319054652880355,
      "grad_norm": 0.2799612879753113,
      "learning_rate": 0.00011794555979800467,
      "loss": 0.2475,
      "step": 3336
    },
    {
      "epoch": 1.232274741506647,
      "grad_norm": 0.24728535115718842,
      "learning_rate": 0.0001179209262224412,
      "loss": 0.231,
      "step": 3337
    },
    {
      "epoch": 1.2326440177252584,
      "grad_norm": 0.24407118558883667,
      "learning_rate": 0.00011789629264687769,
      "loss": 0.2125,
      "step": 3338
    },
    {
      "epoch": 1.23301329394387,
      "grad_norm": 0.34859201312065125,
      "learning_rate": 0.00011787165907131422,
      "loss": 0.2651,
      "step": 3339
    },
    {
      "epoch": 1.2333825701624814,
      "grad_norm": 0.2828972041606903,
      "learning_rate": 0.0001178470254957507,
      "loss": 0.2557,
      "step": 3340
    },
    {
      "epoch": 1.233751846381093,
      "grad_norm": 0.3028653860092163,
      "learning_rate": 0.00011782239192018722,
      "loss": 0.2545,
      "step": 3341
    },
    {
      "epoch": 1.2341211225997046,
      "grad_norm": 0.2722667157649994,
      "learning_rate": 0.00011779775834462372,
      "loss": 0.2406,
      "step": 3342
    },
    {
      "epoch": 1.234490398818316,
      "grad_norm": 0.28357529640197754,
      "learning_rate": 0.00011777312476906024,
      "loss": 0.2486,
      "step": 3343
    },
    {
      "epoch": 1.2348596750369276,
      "grad_norm": 0.24250741302967072,
      "learning_rate": 0.00011774849119349674,
      "loss": 0.2506,
      "step": 3344
    },
    {
      "epoch": 1.2352289512555392,
      "grad_norm": 0.24236734211444855,
      "learning_rate": 0.00011772385761793325,
      "loss": 0.2218,
      "step": 3345
    },
    {
      "epoch": 1.2355982274741506,
      "grad_norm": 0.5239112377166748,
      "learning_rate": 0.00011769922404236975,
      "loss": 0.3046,
      "step": 3346
    },
    {
      "epoch": 1.2359675036927622,
      "grad_norm": 0.253016859292984,
      "learning_rate": 0.00011767459046680627,
      "loss": 0.2395,
      "step": 3347
    },
    {
      "epoch": 1.2363367799113738,
      "grad_norm": 0.29107823967933655,
      "learning_rate": 0.00011764995689124277,
      "loss": 0.2733,
      "step": 3348
    },
    {
      "epoch": 1.2367060561299852,
      "grad_norm": 0.28030651807785034,
      "learning_rate": 0.00011762532331567928,
      "loss": 0.2579,
      "step": 3349
    },
    {
      "epoch": 1.2370753323485968,
      "grad_norm": 0.2902437448501587,
      "learning_rate": 0.00011760068974011578,
      "loss": 0.2443,
      "step": 3350
    },
    {
      "epoch": 1.2370753323485968,
      "eval_loss": 0.2730793058872223,
      "eval_runtime": 5.8659,
      "eval_samples_per_second": 8.524,
      "eval_steps_per_second": 1.193,
      "step": 3350
    },
    {
      "epoch": 1.2374446085672082,
      "grad_norm": 0.29294252395629883,
      "learning_rate": 0.0001175760561645523,
      "loss": 0.2696,
      "step": 3351
    },
    {
      "epoch": 1.2378138847858198,
      "grad_norm": 0.25074857473373413,
      "learning_rate": 0.00011755142258898879,
      "loss": 0.216,
      "step": 3352
    },
    {
      "epoch": 1.2381831610044314,
      "grad_norm": 0.2616208791732788,
      "learning_rate": 0.00011752678901342531,
      "loss": 0.2676,
      "step": 3353
    },
    {
      "epoch": 1.2385524372230428,
      "grad_norm": 0.28293970227241516,
      "learning_rate": 0.0001175021554378618,
      "loss": 0.2803,
      "step": 3354
    },
    {
      "epoch": 1.2389217134416544,
      "grad_norm": 0.24231275916099548,
      "learning_rate": 0.00011747752186229833,
      "loss": 0.2366,
      "step": 3355
    },
    {
      "epoch": 1.239290989660266,
      "grad_norm": 0.217429518699646,
      "learning_rate": 0.00011745288828673482,
      "loss": 0.209,
      "step": 3356
    },
    {
      "epoch": 1.2396602658788773,
      "grad_norm": 0.2831771969795227,
      "learning_rate": 0.00011742825471117133,
      "loss": 0.2674,
      "step": 3357
    },
    {
      "epoch": 1.240029542097489,
      "grad_norm": 0.24386093020439148,
      "learning_rate": 0.00011740362113560783,
      "loss": 0.2052,
      "step": 3358
    },
    {
      "epoch": 1.2403988183161005,
      "grad_norm": 0.2491457760334015,
      "learning_rate": 0.00011737898756004435,
      "loss": 0.2344,
      "step": 3359
    },
    {
      "epoch": 1.240768094534712,
      "grad_norm": 0.22436252236366272,
      "learning_rate": 0.00011735435398448085,
      "loss": 0.1829,
      "step": 3360
    },
    {
      "epoch": 1.2411373707533235,
      "grad_norm": 0.2288627028465271,
      "learning_rate": 0.00011732972040891736,
      "loss": 0.2106,
      "step": 3361
    },
    {
      "epoch": 1.241506646971935,
      "grad_norm": 0.35576456785202026,
      "learning_rate": 0.00011730508683335386,
      "loss": 0.2411,
      "step": 3362
    },
    {
      "epoch": 1.2418759231905465,
      "grad_norm": 0.271321177482605,
      "learning_rate": 0.00011728045325779038,
      "loss": 0.2619,
      "step": 3363
    },
    {
      "epoch": 1.2422451994091581,
      "grad_norm": 0.2414872944355011,
      "learning_rate": 0.00011725581968222688,
      "loss": 0.2155,
      "step": 3364
    },
    {
      "epoch": 1.2426144756277695,
      "grad_norm": 0.25442835688591003,
      "learning_rate": 0.0001172311861066634,
      "loss": 0.2248,
      "step": 3365
    },
    {
      "epoch": 1.242983751846381,
      "grad_norm": 0.8595656156539917,
      "learning_rate": 0.0001172065525310999,
      "loss": 0.2787,
      "step": 3366
    },
    {
      "epoch": 1.2433530280649927,
      "grad_norm": 0.3119123578071594,
      "learning_rate": 0.00011718191895553641,
      "loss": 0.2665,
      "step": 3367
    },
    {
      "epoch": 1.243722304283604,
      "grad_norm": 0.29014065861701965,
      "learning_rate": 0.0001171572853799729,
      "loss": 0.2235,
      "step": 3368
    },
    {
      "epoch": 1.2440915805022157,
      "grad_norm": 0.2537941336631775,
      "learning_rate": 0.00011713265180440943,
      "loss": 0.235,
      "step": 3369
    },
    {
      "epoch": 1.2444608567208273,
      "grad_norm": 0.29093918204307556,
      "learning_rate": 0.00011710801822884591,
      "loss": 0.2536,
      "step": 3370
    },
    {
      "epoch": 1.2448301329394387,
      "grad_norm": 0.2843869626522064,
      "learning_rate": 0.00011708338465328244,
      "loss": 0.2347,
      "step": 3371
    },
    {
      "epoch": 1.2451994091580503,
      "grad_norm": 0.4438280463218689,
      "learning_rate": 0.00011705875107771893,
      "loss": 0.2377,
      "step": 3372
    },
    {
      "epoch": 1.2455686853766617,
      "grad_norm": 0.2818486988544464,
      "learning_rate": 0.00011703411750215544,
      "loss": 0.2344,
      "step": 3373
    },
    {
      "epoch": 1.2459379615952733,
      "grad_norm": 0.29656335711479187,
      "learning_rate": 0.00011700948392659195,
      "loss": 0.2495,
      "step": 3374
    },
    {
      "epoch": 1.2463072378138849,
      "grad_norm": 0.2644748389720917,
      "learning_rate": 0.00011698485035102846,
      "loss": 0.2356,
      "step": 3375
    },
    {
      "epoch": 1.2466765140324962,
      "grad_norm": 0.32339850068092346,
      "learning_rate": 0.00011696021677546496,
      "loss": 0.2401,
      "step": 3376
    },
    {
      "epoch": 1.2470457902511078,
      "grad_norm": 0.27086853981018066,
      "learning_rate": 0.00011693558319990148,
      "loss": 0.2429,
      "step": 3377
    },
    {
      "epoch": 1.2474150664697194,
      "grad_norm": 0.2287064641714096,
      "learning_rate": 0.00011691094962433798,
      "loss": 0.2272,
      "step": 3378
    },
    {
      "epoch": 1.2477843426883308,
      "grad_norm": 0.2801342010498047,
      "learning_rate": 0.00011688631604877449,
      "loss": 0.2895,
      "step": 3379
    },
    {
      "epoch": 1.2481536189069424,
      "grad_norm": 0.34044507145881653,
      "learning_rate": 0.00011686168247321099,
      "loss": 0.3109,
      "step": 3380
    },
    {
      "epoch": 1.248522895125554,
      "grad_norm": 0.2989860773086548,
      "learning_rate": 0.00011683704889764751,
      "loss": 0.2594,
      "step": 3381
    },
    {
      "epoch": 1.2488921713441654,
      "grad_norm": 0.2658219635486603,
      "learning_rate": 0.000116812415322084,
      "loss": 0.2233,
      "step": 3382
    },
    {
      "epoch": 1.249261447562777,
      "grad_norm": 0.2827605903148651,
      "learning_rate": 0.00011678778174652052,
      "loss": 0.218,
      "step": 3383
    },
    {
      "epoch": 1.2496307237813884,
      "grad_norm": 0.3575083315372467,
      "learning_rate": 0.00011676314817095701,
      "loss": 0.3148,
      "step": 3384
    },
    {
      "epoch": 1.25,
      "grad_norm": 0.32070037722587585,
      "learning_rate": 0.00011673851459539354,
      "loss": 0.2505,
      "step": 3385
    },
    {
      "epoch": 1.2503692762186116,
      "grad_norm": 0.23181554675102234,
      "learning_rate": 0.00011671388101983003,
      "loss": 0.2229,
      "step": 3386
    },
    {
      "epoch": 1.250738552437223,
      "grad_norm": 0.20842178165912628,
      "learning_rate": 0.00011668924744426654,
      "loss": 0.201,
      "step": 3387
    },
    {
      "epoch": 1.2511078286558346,
      "grad_norm": 0.21866343915462494,
      "learning_rate": 0.00011666461386870304,
      "loss": 0.2098,
      "step": 3388
    },
    {
      "epoch": 1.251477104874446,
      "grad_norm": 0.3296528160572052,
      "learning_rate": 0.00011663998029313956,
      "loss": 0.2112,
      "step": 3389
    },
    {
      "epoch": 1.2518463810930576,
      "grad_norm": 0.26271867752075195,
      "learning_rate": 0.00011661534671757606,
      "loss": 0.261,
      "step": 3390
    },
    {
      "epoch": 1.2522156573116692,
      "grad_norm": 0.2829124629497528,
      "learning_rate": 0.00011659071314201257,
      "loss": 0.2296,
      "step": 3391
    },
    {
      "epoch": 1.2525849335302808,
      "grad_norm": 0.26187101006507874,
      "learning_rate": 0.00011656607956644907,
      "loss": 0.2213,
      "step": 3392
    },
    {
      "epoch": 1.2529542097488922,
      "grad_norm": 0.2645774781703949,
      "learning_rate": 0.00011654144599088559,
      "loss": 0.2434,
      "step": 3393
    },
    {
      "epoch": 1.2533234859675038,
      "grad_norm": 0.27006688714027405,
      "learning_rate": 0.00011651681241532209,
      "loss": 0.2364,
      "step": 3394
    },
    {
      "epoch": 1.2536927621861151,
      "grad_norm": 0.2682693898677826,
      "learning_rate": 0.0001164921788397586,
      "loss": 0.2313,
      "step": 3395
    },
    {
      "epoch": 1.2540620384047267,
      "grad_norm": 0.29124483466148376,
      "learning_rate": 0.0001164675452641951,
      "loss": 0.2309,
      "step": 3396
    },
    {
      "epoch": 1.2544313146233383,
      "grad_norm": 0.2844143509864807,
      "learning_rate": 0.00011644291168863162,
      "loss": 0.2489,
      "step": 3397
    },
    {
      "epoch": 1.2548005908419497,
      "grad_norm": 0.24740177392959595,
      "learning_rate": 0.00011641827811306811,
      "loss": 0.212,
      "step": 3398
    },
    {
      "epoch": 1.2551698670605613,
      "grad_norm": 0.2769998013973236,
      "learning_rate": 0.00011639364453750463,
      "loss": 0.2421,
      "step": 3399
    },
    {
      "epoch": 1.2555391432791727,
      "grad_norm": 0.27294492721557617,
      "learning_rate": 0.00011636901096194112,
      "loss": 0.2532,
      "step": 3400
    },
    {
      "epoch": 1.2555391432791727,
      "eval_loss": 0.2674335241317749,
      "eval_runtime": 5.8599,
      "eval_samples_per_second": 8.533,
      "eval_steps_per_second": 1.195,
      "step": 3400
    },
    {
      "epoch": 1.2559084194977843,
      "grad_norm": 0.3451029658317566,
      "learning_rate": 0.00011634437738637765,
      "loss": 0.2621,
      "step": 3401
    },
    {
      "epoch": 1.256277695716396,
      "grad_norm": 0.28013747930526733,
      "learning_rate": 0.00011631974381081414,
      "loss": 0.2448,
      "step": 3402
    },
    {
      "epoch": 1.2566469719350075,
      "grad_norm": 0.26190176606178284,
      "learning_rate": 0.00011629511023525065,
      "loss": 0.2237,
      "step": 3403
    },
    {
      "epoch": 1.257016248153619,
      "grad_norm": 0.30306148529052734,
      "learning_rate": 0.00011627047665968715,
      "loss": 0.252,
      "step": 3404
    },
    {
      "epoch": 1.2573855243722305,
      "grad_norm": 0.2617952823638916,
      "learning_rate": 0.00011624584308412367,
      "loss": 0.2137,
      "step": 3405
    },
    {
      "epoch": 1.2577548005908419,
      "grad_norm": 0.35550904273986816,
      "learning_rate": 0.00011622120950856017,
      "loss": 0.266,
      "step": 3406
    },
    {
      "epoch": 1.2581240768094535,
      "grad_norm": 0.3020575940608978,
      "learning_rate": 0.00011619657593299668,
      "loss": 0.2807,
      "step": 3407
    },
    {
      "epoch": 1.258493353028065,
      "grad_norm": 0.30012327432632446,
      "learning_rate": 0.00011617194235743319,
      "loss": 0.2647,
      "step": 3408
    },
    {
      "epoch": 1.2588626292466765,
      "grad_norm": 0.29071366786956787,
      "learning_rate": 0.0001161473087818697,
      "loss": 0.2536,
      "step": 3409
    },
    {
      "epoch": 1.259231905465288,
      "grad_norm": 0.28802093863487244,
      "learning_rate": 0.0001161226752063062,
      "loss": 0.2628,
      "step": 3410
    },
    {
      "epoch": 1.2596011816838995,
      "grad_norm": 0.253380686044693,
      "learning_rate": 0.00011609804163074272,
      "loss": 0.2493,
      "step": 3411
    },
    {
      "epoch": 1.259970457902511,
      "grad_norm": 0.27335187792778015,
      "learning_rate": 0.00011607340805517922,
      "loss": 0.2475,
      "step": 3412
    },
    {
      "epoch": 1.2603397341211227,
      "grad_norm": 0.24106822907924652,
      "learning_rate": 0.00011604877447961573,
      "loss": 0.1973,
      "step": 3413
    },
    {
      "epoch": 1.2607090103397343,
      "grad_norm": 0.2284409999847412,
      "learning_rate": 0.00011602414090405222,
      "loss": 0.203,
      "step": 3414
    },
    {
      "epoch": 1.2610782865583456,
      "grad_norm": 0.2706008851528168,
      "learning_rate": 0.00011599950732848875,
      "loss": 0.2147,
      "step": 3415
    },
    {
      "epoch": 1.2614475627769572,
      "grad_norm": 0.27175143361091614,
      "learning_rate": 0.00011597487375292523,
      "loss": 0.2403,
      "step": 3416
    },
    {
      "epoch": 1.2618168389955686,
      "grad_norm": 0.2630060613155365,
      "learning_rate": 0.00011595024017736176,
      "loss": 0.2426,
      "step": 3417
    },
    {
      "epoch": 1.2621861152141802,
      "grad_norm": 0.266940176486969,
      "learning_rate": 0.00011592560660179825,
      "loss": 0.242,
      "step": 3418
    },
    {
      "epoch": 1.2625553914327918,
      "grad_norm": 0.2598400115966797,
      "learning_rate": 0.00011590097302623477,
      "loss": 0.2826,
      "step": 3419
    },
    {
      "epoch": 1.2629246676514032,
      "grad_norm": 0.2574462294578552,
      "learning_rate": 0.00011587633945067127,
      "loss": 0.2622,
      "step": 3420
    },
    {
      "epoch": 1.2632939438700148,
      "grad_norm": 0.27017250657081604,
      "learning_rate": 0.00011585170587510778,
      "loss": 0.2358,
      "step": 3421
    },
    {
      "epoch": 1.2636632200886262,
      "grad_norm": 0.30165326595306396,
      "learning_rate": 0.00011582707229954428,
      "loss": 0.2862,
      "step": 3422
    },
    {
      "epoch": 1.2640324963072378,
      "grad_norm": 0.2234553098678589,
      "learning_rate": 0.0001158024387239808,
      "loss": 0.1956,
      "step": 3423
    },
    {
      "epoch": 1.2644017725258494,
      "grad_norm": 0.299065500497818,
      "learning_rate": 0.0001157778051484173,
      "loss": 0.2207,
      "step": 3424
    },
    {
      "epoch": 1.2647710487444608,
      "grad_norm": 0.23009859025478363,
      "learning_rate": 0.00011575317157285381,
      "loss": 0.2,
      "step": 3425
    },
    {
      "epoch": 1.2651403249630724,
      "grad_norm": 0.31464433670043945,
      "learning_rate": 0.00011572853799729031,
      "loss": 0.2199,
      "step": 3426
    },
    {
      "epoch": 1.2655096011816838,
      "grad_norm": 0.2601170837879181,
      "learning_rate": 0.00011570390442172683,
      "loss": 0.2452,
      "step": 3427
    },
    {
      "epoch": 1.2658788774002954,
      "grad_norm": 0.21769161522388458,
      "learning_rate": 0.00011567927084616333,
      "loss": 0.1744,
      "step": 3428
    },
    {
      "epoch": 1.266248153618907,
      "grad_norm": 0.30806398391723633,
      "learning_rate": 0.00011565463727059984,
      "loss": 0.2467,
      "step": 3429
    },
    {
      "epoch": 1.2666174298375186,
      "grad_norm": 0.21978482604026794,
      "learning_rate": 0.00011563000369503633,
      "loss": 0.2407,
      "step": 3430
    },
    {
      "epoch": 1.26698670605613,
      "grad_norm": 0.2523432970046997,
      "learning_rate": 0.00011560537011947286,
      "loss": 0.2333,
      "step": 3431
    },
    {
      "epoch": 1.2673559822747416,
      "grad_norm": 0.31474968791007996,
      "learning_rate": 0.00011558073654390935,
      "loss": 0.2391,
      "step": 3432
    },
    {
      "epoch": 1.267725258493353,
      "grad_norm": 0.2926175892353058,
      "learning_rate": 0.00011555610296834588,
      "loss": 0.2344,
      "step": 3433
    },
    {
      "epoch": 1.2680945347119645,
      "grad_norm": 0.25597435235977173,
      "learning_rate": 0.00011553146939278236,
      "loss": 0.1791,
      "step": 3434
    },
    {
      "epoch": 1.2684638109305761,
      "grad_norm": 0.2518375813961029,
      "learning_rate": 0.00011550683581721888,
      "loss": 0.2316,
      "step": 3435
    },
    {
      "epoch": 1.2688330871491875,
      "grad_norm": 0.303515762090683,
      "learning_rate": 0.00011548220224165538,
      "loss": 0.2911,
      "step": 3436
    },
    {
      "epoch": 1.2692023633677991,
      "grad_norm": 0.33104947209358215,
      "learning_rate": 0.00011545756866609189,
      "loss": 0.2776,
      "step": 3437
    },
    {
      "epoch": 1.2695716395864105,
      "grad_norm": 0.2640734612941742,
      "learning_rate": 0.0001154329350905284,
      "loss": 0.2471,
      "step": 3438
    },
    {
      "epoch": 1.269940915805022,
      "grad_norm": 0.23754489421844482,
      "learning_rate": 0.00011540830151496491,
      "loss": 0.2209,
      "step": 3439
    },
    {
      "epoch": 1.2703101920236337,
      "grad_norm": 0.25826865434646606,
      "learning_rate": 0.00011538366793940141,
      "loss": 0.2148,
      "step": 3440
    },
    {
      "epoch": 1.2706794682422453,
      "grad_norm": 0.2712554335594177,
      "learning_rate": 0.00011535903436383792,
      "loss": 0.2439,
      "step": 3441
    },
    {
      "epoch": 1.2710487444608567,
      "grad_norm": 0.25869423151016235,
      "learning_rate": 0.00011533440078827443,
      "loss": 0.2273,
      "step": 3442
    },
    {
      "epoch": 1.2714180206794683,
      "grad_norm": 0.29017847776412964,
      "learning_rate": 0.00011530976721271094,
      "loss": 0.2136,
      "step": 3443
    },
    {
      "epoch": 1.2717872968980797,
      "grad_norm": 0.26316726207733154,
      "learning_rate": 0.00011528513363714744,
      "loss": 0.232,
      "step": 3444
    },
    {
      "epoch": 1.2721565731166913,
      "grad_norm": 0.27002277970314026,
      "learning_rate": 0.00011526050006158396,
      "loss": 0.226,
      "step": 3445
    },
    {
      "epoch": 1.2725258493353029,
      "grad_norm": 0.3249281048774719,
      "learning_rate": 0.00011523586648602044,
      "loss": 0.2423,
      "step": 3446
    },
    {
      "epoch": 1.2728951255539143,
      "grad_norm": 0.33024588227272034,
      "learning_rate": 0.00011521123291045697,
      "loss": 0.2401,
      "step": 3447
    },
    {
      "epoch": 1.2732644017725259,
      "grad_norm": 0.2906222939491272,
      "learning_rate": 0.00011518659933489346,
      "loss": 0.2713,
      "step": 3448
    },
    {
      "epoch": 1.2736336779911372,
      "grad_norm": 0.4145973324775696,
      "learning_rate": 0.00011516196575932999,
      "loss": 0.2249,
      "step": 3449
    },
    {
      "epoch": 1.2740029542097489,
      "grad_norm": 0.24750135838985443,
      "learning_rate": 0.00011513733218376648,
      "loss": 0.2217,
      "step": 3450
    },
    {
      "epoch": 1.2740029542097489,
      "eval_loss": 0.27090415358543396,
      "eval_runtime": 5.8672,
      "eval_samples_per_second": 8.522,
      "eval_steps_per_second": 1.193,
      "step": 3450
    },
    {
      "epoch": 1.2743722304283605,
      "grad_norm": 0.31642940640449524,
      "learning_rate": 0.00011511269860820299,
      "loss": 0.2239,
      "step": 3451
    },
    {
      "epoch": 1.274741506646972,
      "grad_norm": 0.30429863929748535,
      "learning_rate": 0.00011508806503263949,
      "loss": 0.2396,
      "step": 3452
    },
    {
      "epoch": 1.2751107828655834,
      "grad_norm": 0.3115158677101135,
      "learning_rate": 0.000115063431457076,
      "loss": 0.3013,
      "step": 3453
    },
    {
      "epoch": 1.275480059084195,
      "grad_norm": 0.32751041650772095,
      "learning_rate": 0.0001150387978815125,
      "loss": 0.2545,
      "step": 3454
    },
    {
      "epoch": 1.2758493353028064,
      "grad_norm": 0.35201385617256165,
      "learning_rate": 0.00011501416430594902,
      "loss": 0.2371,
      "step": 3455
    },
    {
      "epoch": 1.276218611521418,
      "grad_norm": 0.3706410229206085,
      "learning_rate": 0.00011498953073038552,
      "loss": 0.2305,
      "step": 3456
    },
    {
      "epoch": 1.2765878877400296,
      "grad_norm": 0.28357771039009094,
      "learning_rate": 0.00011496489715482204,
      "loss": 0.2265,
      "step": 3457
    },
    {
      "epoch": 1.276957163958641,
      "grad_norm": 0.2744147479534149,
      "learning_rate": 0.00011494026357925854,
      "loss": 0.2528,
      "step": 3458
    },
    {
      "epoch": 1.2773264401772526,
      "grad_norm": 0.3463950455188751,
      "learning_rate": 0.00011491563000369505,
      "loss": 0.3019,
      "step": 3459
    },
    {
      "epoch": 1.277695716395864,
      "grad_norm": 0.30260658264160156,
      "learning_rate": 0.00011489099642813155,
      "loss": 0.2202,
      "step": 3460
    },
    {
      "epoch": 1.2780649926144756,
      "grad_norm": 0.27453920245170593,
      "learning_rate": 0.00011486636285256807,
      "loss": 0.2178,
      "step": 3461
    },
    {
      "epoch": 1.2784342688330872,
      "grad_norm": 0.27138856053352356,
      "learning_rate": 0.00011484172927700456,
      "loss": 0.2267,
      "step": 3462
    },
    {
      "epoch": 1.2788035450516988,
      "grad_norm": 0.30846890807151794,
      "learning_rate": 0.00011481709570144108,
      "loss": 0.2572,
      "step": 3463
    },
    {
      "epoch": 1.2791728212703102,
      "grad_norm": 0.30298444628715515,
      "learning_rate": 0.00011479246212587757,
      "loss": 0.26,
      "step": 3464
    },
    {
      "epoch": 1.2795420974889218,
      "grad_norm": 0.29792797565460205,
      "learning_rate": 0.0001147678285503141,
      "loss": 0.2344,
      "step": 3465
    },
    {
      "epoch": 1.2799113737075332,
      "grad_norm": 0.3351114094257355,
      "learning_rate": 0.00011474319497475059,
      "loss": 0.2508,
      "step": 3466
    },
    {
      "epoch": 1.2802806499261448,
      "grad_norm": 0.4213079512119293,
      "learning_rate": 0.0001147185613991871,
      "loss": 0.2774,
      "step": 3467
    },
    {
      "epoch": 1.2806499261447564,
      "grad_norm": 0.4092477858066559,
      "learning_rate": 0.0001146939278236236,
      "loss": 0.3138,
      "step": 3468
    },
    {
      "epoch": 1.2810192023633677,
      "grad_norm": 0.28857889771461487,
      "learning_rate": 0.00011466929424806012,
      "loss": 0.2354,
      "step": 3469
    },
    {
      "epoch": 1.2813884785819794,
      "grad_norm": 0.29781675338745117,
      "learning_rate": 0.00011464466067249662,
      "loss": 0.2168,
      "step": 3470
    },
    {
      "epoch": 1.2817577548005907,
      "grad_norm": 0.2629866302013397,
      "learning_rate": 0.00011462002709693313,
      "loss": 0.201,
      "step": 3471
    },
    {
      "epoch": 1.2821270310192023,
      "grad_norm": 0.2955879271030426,
      "learning_rate": 0.00011459539352136963,
      "loss": 0.2853,
      "step": 3472
    },
    {
      "epoch": 1.282496307237814,
      "grad_norm": 0.3258877694606781,
      "learning_rate": 0.00011457075994580615,
      "loss": 0.2475,
      "step": 3473
    },
    {
      "epoch": 1.2828655834564255,
      "grad_norm": 0.28191569447517395,
      "learning_rate": 0.00011454612637024265,
      "loss": 0.2815,
      "step": 3474
    },
    {
      "epoch": 1.283234859675037,
      "grad_norm": 0.27228131890296936,
      "learning_rate": 0.00011452149279467916,
      "loss": 0.2305,
      "step": 3475
    },
    {
      "epoch": 1.2836041358936485,
      "grad_norm": 0.3138941526412964,
      "learning_rate": 0.00011449685921911567,
      "loss": 0.2513,
      "step": 3476
    },
    {
      "epoch": 1.28397341211226,
      "grad_norm": 0.2670232653617859,
      "learning_rate": 0.00011447222564355218,
      "loss": 0.2123,
      "step": 3477
    },
    {
      "epoch": 1.2843426883308715,
      "grad_norm": 0.2697356343269348,
      "learning_rate": 0.00011444759206798867,
      "loss": 0.212,
      "step": 3478
    },
    {
      "epoch": 1.284711964549483,
      "grad_norm": 0.26701635122299194,
      "learning_rate": 0.0001144229584924252,
      "loss": 0.2375,
      "step": 3479
    },
    {
      "epoch": 1.2850812407680945,
      "grad_norm": 0.25487884879112244,
      "learning_rate": 0.00011439832491686168,
      "loss": 0.2339,
      "step": 3480
    },
    {
      "epoch": 1.285450516986706,
      "grad_norm": 0.22980691492557526,
      "learning_rate": 0.00011437369134129821,
      "loss": 0.2139,
      "step": 3481
    },
    {
      "epoch": 1.2858197932053175,
      "grad_norm": 0.26740097999572754,
      "learning_rate": 0.0001143490577657347,
      "loss": 0.2707,
      "step": 3482
    },
    {
      "epoch": 1.286189069423929,
      "grad_norm": 0.2384112924337387,
      "learning_rate": 0.00011432442419017121,
      "loss": 0.222,
      "step": 3483
    },
    {
      "epoch": 1.2865583456425407,
      "grad_norm": 0.272722452878952,
      "learning_rate": 0.00011429979061460772,
      "loss": 0.2641,
      "step": 3484
    },
    {
      "epoch": 1.286927621861152,
      "grad_norm": 0.24369671940803528,
      "learning_rate": 0.00011427515703904422,
      "loss": 0.2633,
      "step": 3485
    },
    {
      "epoch": 1.2872968980797637,
      "grad_norm": 0.26729193329811096,
      "learning_rate": 0.00011425052346348073,
      "loss": 0.2526,
      "step": 3486
    },
    {
      "epoch": 1.2876661742983753,
      "grad_norm": 0.2396964579820633,
      "learning_rate": 0.00011422588988791723,
      "loss": 0.2016,
      "step": 3487
    },
    {
      "epoch": 1.2880354505169866,
      "grad_norm": 0.26808351278305054,
      "learning_rate": 0.00011420125631235375,
      "loss": 0.2454,
      "step": 3488
    },
    {
      "epoch": 1.2884047267355982,
      "grad_norm": 0.3159623146057129,
      "learning_rate": 0.00011417662273679023,
      "loss": 0.256,
      "step": 3489
    },
    {
      "epoch": 1.2887740029542099,
      "grad_norm": 0.24514681100845337,
      "learning_rate": 0.00011415198916122676,
      "loss": 0.2292,
      "step": 3490
    },
    {
      "epoch": 1.2891432791728212,
      "grad_norm": 0.301023006439209,
      "learning_rate": 0.00011412735558566325,
      "loss": 0.2833,
      "step": 3491
    },
    {
      "epoch": 1.2895125553914328,
      "grad_norm": 0.24914702773094177,
      "learning_rate": 0.00011410272201009978,
      "loss": 0.2005,
      "step": 3492
    },
    {
      "epoch": 1.2898818316100442,
      "grad_norm": 0.2552284896373749,
      "learning_rate": 0.00011407808843453627,
      "loss": 0.2318,
      "step": 3493
    },
    {
      "epoch": 1.2902511078286558,
      "grad_norm": 0.3317311108112335,
      "learning_rate": 0.00011405345485897278,
      "loss": 0.2868,
      "step": 3494
    },
    {
      "epoch": 1.2906203840472674,
      "grad_norm": 0.2974274158477783,
      "learning_rate": 0.00011402882128340928,
      "loss": 0.2212,
      "step": 3495
    },
    {
      "epoch": 1.2909896602658788,
      "grad_norm": 0.23321019113063812,
      "learning_rate": 0.0001140041877078458,
      "loss": 0.2282,
      "step": 3496
    },
    {
      "epoch": 1.2913589364844904,
      "grad_norm": 0.26129546761512756,
      "learning_rate": 0.0001139795541322823,
      "loss": 0.21,
      "step": 3497
    },
    {
      "epoch": 1.2917282127031018,
      "grad_norm": 0.32607483863830566,
      "learning_rate": 0.00011395492055671881,
      "loss": 0.2675,
      "step": 3498
    },
    {
      "epoch": 1.2920974889217134,
      "grad_norm": 0.24857379496097565,
      "learning_rate": 0.00011393028698115531,
      "loss": 0.1942,
      "step": 3499
    },
    {
      "epoch": 1.292466765140325,
      "grad_norm": 0.23598088324069977,
      "learning_rate": 0.00011390565340559183,
      "loss": 0.2058,
      "step": 3500
    },
    {
      "epoch": 1.292466765140325,
      "eval_loss": 0.27111950516700745,
      "eval_runtime": 5.8744,
      "eval_samples_per_second": 8.511,
      "eval_steps_per_second": 1.192,
      "step": 3500
    },
    {
      "epoch": 1.2928360413589366,
      "grad_norm": 0.26858481764793396,
      "learning_rate": 0.00011388101983002833,
      "loss": 0.2404,
      "step": 3501
    },
    {
      "epoch": 1.293205317577548,
      "grad_norm": 0.29856938123703003,
      "learning_rate": 0.00011385638625446484,
      "loss": 0.2611,
      "step": 3502
    },
    {
      "epoch": 1.2935745937961596,
      "grad_norm": 0.23774632811546326,
      "learning_rate": 0.00011383175267890134,
      "loss": 0.2067,
      "step": 3503
    },
    {
      "epoch": 1.293943870014771,
      "grad_norm": 0.31560781598091125,
      "learning_rate": 0.00011380711910333786,
      "loss": 0.2584,
      "step": 3504
    },
    {
      "epoch": 1.2943131462333826,
      "grad_norm": 0.29972249269485474,
      "learning_rate": 0.00011378248552777435,
      "loss": 0.2509,
      "step": 3505
    },
    {
      "epoch": 1.2946824224519942,
      "grad_norm": 0.5040701031684875,
      "learning_rate": 0.00011375785195221087,
      "loss": 0.2352,
      "step": 3506
    },
    {
      "epoch": 1.2950516986706055,
      "grad_norm": 0.2971581518650055,
      "learning_rate": 0.00011373321837664736,
      "loss": 0.2743,
      "step": 3507
    },
    {
      "epoch": 1.2954209748892171,
      "grad_norm": 0.3349955081939697,
      "learning_rate": 0.00011370858480108389,
      "loss": 0.2851,
      "step": 3508
    },
    {
      "epoch": 1.2957902511078285,
      "grad_norm": 0.2929718494415283,
      "learning_rate": 0.00011368395122552038,
      "loss": 0.2723,
      "step": 3509
    },
    {
      "epoch": 1.2961595273264401,
      "grad_norm": 0.31535041332244873,
      "learning_rate": 0.00011365931764995689,
      "loss": 0.2429,
      "step": 3510
    },
    {
      "epoch": 1.2965288035450517,
      "grad_norm": 0.328657865524292,
      "learning_rate": 0.0001136346840743934,
      "loss": 0.2653,
      "step": 3511
    },
    {
      "epoch": 1.2968980797636633,
      "grad_norm": 0.29910996556282043,
      "learning_rate": 0.00011361005049882991,
      "loss": 0.2304,
      "step": 3512
    },
    {
      "epoch": 1.2972673559822747,
      "grad_norm": 0.28474050760269165,
      "learning_rate": 0.00011358541692326641,
      "loss": 0.2538,
      "step": 3513
    },
    {
      "epoch": 1.2976366322008863,
      "grad_norm": 0.3016922175884247,
      "learning_rate": 0.00011356078334770292,
      "loss": 0.2498,
      "step": 3514
    },
    {
      "epoch": 1.2980059084194977,
      "grad_norm": 0.2724801301956177,
      "learning_rate": 0.00011353614977213943,
      "loss": 0.2384,
      "step": 3515
    },
    {
      "epoch": 1.2983751846381093,
      "grad_norm": 0.23370754718780518,
      "learning_rate": 0.00011351151619657594,
      "loss": 0.2254,
      "step": 3516
    },
    {
      "epoch": 1.298744460856721,
      "grad_norm": 0.2495436668395996,
      "learning_rate": 0.00011348688262101244,
      "loss": 0.2422,
      "step": 3517
    },
    {
      "epoch": 1.2991137370753323,
      "grad_norm": 0.2952888309955597,
      "learning_rate": 0.00011346224904544896,
      "loss": 0.2396,
      "step": 3518
    },
    {
      "epoch": 1.299483013293944,
      "grad_norm": 0.2261483073234558,
      "learning_rate": 0.00011343761546988546,
      "loss": 0.2039,
      "step": 3519
    },
    {
      "epoch": 1.2998522895125553,
      "grad_norm": 0.2913593351840973,
      "learning_rate": 0.00011341298189432197,
      "loss": 0.2595,
      "step": 3520
    },
    {
      "epoch": 1.3002215657311669,
      "grad_norm": 0.32711273431777954,
      "learning_rate": 0.00011338834831875846,
      "loss": 0.2758,
      "step": 3521
    },
    {
      "epoch": 1.3005908419497785,
      "grad_norm": 0.40282630920410156,
      "learning_rate": 0.00011336371474319499,
      "loss": 0.3096,
      "step": 3522
    },
    {
      "epoch": 1.30096011816839,
      "grad_norm": 0.26645639538764954,
      "learning_rate": 0.00011333908116763147,
      "loss": 0.2112,
      "step": 3523
    },
    {
      "epoch": 1.3013293943870015,
      "grad_norm": 0.34445178508758545,
      "learning_rate": 0.000113314447592068,
      "loss": 0.2779,
      "step": 3524
    },
    {
      "epoch": 1.301698670605613,
      "grad_norm": 0.2424442619085312,
      "learning_rate": 0.00011328981401650449,
      "loss": 0.2142,
      "step": 3525
    },
    {
      "epoch": 1.3020679468242244,
      "grad_norm": 0.27171117067337036,
      "learning_rate": 0.000113265180440941,
      "loss": 0.2487,
      "step": 3526
    },
    {
      "epoch": 1.302437223042836,
      "grad_norm": 0.2760293185710907,
      "learning_rate": 0.0001132405468653775,
      "loss": 0.2428,
      "step": 3527
    },
    {
      "epoch": 1.3028064992614476,
      "grad_norm": 0.2675725221633911,
      "learning_rate": 0.00011321591328981402,
      "loss": 0.2144,
      "step": 3528
    },
    {
      "epoch": 1.303175775480059,
      "grad_norm": 0.25027212500572205,
      "learning_rate": 0.00011319127971425052,
      "loss": 0.2479,
      "step": 3529
    },
    {
      "epoch": 1.3035450516986706,
      "grad_norm": 0.3069452941417694,
      "learning_rate": 0.00011316664613868704,
      "loss": 0.2759,
      "step": 3530
    },
    {
      "epoch": 1.303914327917282,
      "grad_norm": 0.2560293972492218,
      "learning_rate": 0.00011314201256312354,
      "loss": 0.2396,
      "step": 3531
    },
    {
      "epoch": 1.3042836041358936,
      "grad_norm": 0.24829941987991333,
      "learning_rate": 0.00011311737898756005,
      "loss": 0.2301,
      "step": 3532
    },
    {
      "epoch": 1.3046528803545052,
      "grad_norm": 0.21412433683872223,
      "learning_rate": 0.00011309274541199655,
      "loss": 0.192,
      "step": 3533
    },
    {
      "epoch": 1.3050221565731168,
      "grad_norm": 0.24372343719005585,
      "learning_rate": 0.00011306811183643307,
      "loss": 0.195,
      "step": 3534
    },
    {
      "epoch": 1.3053914327917282,
      "grad_norm": 0.32921963930130005,
      "learning_rate": 0.00011304347826086956,
      "loss": 0.2878,
      "step": 3535
    },
    {
      "epoch": 1.3057607090103398,
      "grad_norm": 0.3481973111629486,
      "learning_rate": 0.00011301884468530608,
      "loss": 0.2343,
      "step": 3536
    },
    {
      "epoch": 1.3061299852289512,
      "grad_norm": 0.31481435894966125,
      "learning_rate": 0.00011299421110974257,
      "loss": 0.3065,
      "step": 3537
    },
    {
      "epoch": 1.3064992614475628,
      "grad_norm": 0.24863918125629425,
      "learning_rate": 0.0001129695775341791,
      "loss": 0.2344,
      "step": 3538
    },
    {
      "epoch": 1.3068685376661744,
      "grad_norm": 0.30417582392692566,
      "learning_rate": 0.00011294494395861559,
      "loss": 0.2305,
      "step": 3539
    },
    {
      "epoch": 1.3072378138847858,
      "grad_norm": 0.2873446047306061,
      "learning_rate": 0.0001129203103830521,
      "loss": 0.2626,
      "step": 3540
    },
    {
      "epoch": 1.3076070901033974,
      "grad_norm": 0.2590044140815735,
      "learning_rate": 0.0001128956768074886,
      "loss": 0.2163,
      "step": 3541
    },
    {
      "epoch": 1.3079763663220088,
      "grad_norm": 0.23338545858860016,
      "learning_rate": 0.00011287104323192512,
      "loss": 0.2346,
      "step": 3542
    },
    {
      "epoch": 1.3083456425406204,
      "grad_norm": 0.2515639662742615,
      "learning_rate": 0.00011284640965636162,
      "loss": 0.2606,
      "step": 3543
    },
    {
      "epoch": 1.308714918759232,
      "grad_norm": 0.21571849286556244,
      "learning_rate": 0.00011282177608079813,
      "loss": 0.1929,
      "step": 3544
    },
    {
      "epoch": 1.3090841949778436,
      "grad_norm": 0.3184591829776764,
      "learning_rate": 0.00011279714250523463,
      "loss": 0.2396,
      "step": 3545
    },
    {
      "epoch": 1.309453471196455,
      "grad_norm": 0.22089743614196777,
      "learning_rate": 0.00011277250892967115,
      "loss": 0.1959,
      "step": 3546
    },
    {
      "epoch": 1.3098227474150665,
      "grad_norm": 0.23892144858837128,
      "learning_rate": 0.00011274787535410765,
      "loss": 0.2171,
      "step": 3547
    },
    {
      "epoch": 1.310192023633678,
      "grad_norm": 0.2656129002571106,
      "learning_rate": 0.00011272324177854416,
      "loss": 0.2251,
      "step": 3548
    },
    {
      "epoch": 1.3105612998522895,
      "grad_norm": 0.2840602993965149,
      "learning_rate": 0.00011269860820298067,
      "loss": 0.239,
      "step": 3549
    },
    {
      "epoch": 1.3109305760709011,
      "grad_norm": 0.25248202681541443,
      "learning_rate": 0.00011267397462741718,
      "loss": 0.2379,
      "step": 3550
    },
    {
      "epoch": 1.3109305760709011,
      "eval_loss": 0.27254676818847656,
      "eval_runtime": 5.8791,
      "eval_samples_per_second": 8.505,
      "eval_steps_per_second": 1.191,
      "step": 3550
    },
    {
      "epoch": 1.3112998522895125,
      "grad_norm": 0.23947429656982422,
      "learning_rate": 0.00011264934105185367,
      "loss": 0.2408,
      "step": 3551
    },
    {
      "epoch": 1.3116691285081241,
      "grad_norm": 0.2857097387313843,
      "learning_rate": 0.0001126247074762902,
      "loss": 0.2228,
      "step": 3552
    },
    {
      "epoch": 1.3120384047267355,
      "grad_norm": 0.25149619579315186,
      "learning_rate": 0.00011260007390072668,
      "loss": 0.2475,
      "step": 3553
    },
    {
      "epoch": 1.312407680945347,
      "grad_norm": 0.276044636964798,
      "learning_rate": 0.00011257544032516321,
      "loss": 0.2294,
      "step": 3554
    },
    {
      "epoch": 1.3127769571639587,
      "grad_norm": 0.2988446354866028,
      "learning_rate": 0.0001125508067495997,
      "loss": 0.2923,
      "step": 3555
    },
    {
      "epoch": 1.31314623338257,
      "grad_norm": 0.2972252666950226,
      "learning_rate": 0.00011252617317403621,
      "loss": 0.2797,
      "step": 3556
    },
    {
      "epoch": 1.3135155096011817,
      "grad_norm": 0.2943788170814514,
      "learning_rate": 0.00011250153959847271,
      "loss": 0.2874,
      "step": 3557
    },
    {
      "epoch": 1.3138847858197933,
      "grad_norm": 0.29646357893943787,
      "learning_rate": 0.00011247690602290923,
      "loss": 0.2625,
      "step": 3558
    },
    {
      "epoch": 1.3142540620384047,
      "grad_norm": 0.3290356397628784,
      "learning_rate": 0.00011245227244734573,
      "loss": 0.2276,
      "step": 3559
    },
    {
      "epoch": 1.3146233382570163,
      "grad_norm": 0.32188060879707336,
      "learning_rate": 0.00011242763887178225,
      "loss": 0.3005,
      "step": 3560
    },
    {
      "epoch": 1.3149926144756279,
      "grad_norm": 0.28455227613449097,
      "learning_rate": 0.00011240300529621875,
      "loss": 0.2535,
      "step": 3561
    },
    {
      "epoch": 1.3153618906942393,
      "grad_norm": 0.2558704614639282,
      "learning_rate": 0.00011237837172065526,
      "loss": 0.2569,
      "step": 3562
    },
    {
      "epoch": 1.3157311669128509,
      "grad_norm": 0.3473958671092987,
      "learning_rate": 0.00011235373814509176,
      "loss": 0.2938,
      "step": 3563
    },
    {
      "epoch": 1.3161004431314622,
      "grad_norm": 0.28356698155403137,
      "learning_rate": 0.00011232910456952828,
      "loss": 0.2519,
      "step": 3564
    },
    {
      "epoch": 1.3164697193500738,
      "grad_norm": 0.2912018895149231,
      "learning_rate": 0.00011230447099396478,
      "loss": 0.2517,
      "step": 3565
    },
    {
      "epoch": 1.3168389955686854,
      "grad_norm": 0.28475067019462585,
      "learning_rate": 0.00011227983741840129,
      "loss": 0.2061,
      "step": 3566
    },
    {
      "epoch": 1.3172082717872968,
      "grad_norm": 0.27545440196990967,
      "learning_rate": 0.00011225520384283778,
      "loss": 0.244,
      "step": 3567
    },
    {
      "epoch": 1.3175775480059084,
      "grad_norm": 0.250146746635437,
      "learning_rate": 0.00011223057026727431,
      "loss": 0.2372,
      "step": 3568
    },
    {
      "epoch": 1.3179468242245198,
      "grad_norm": 0.30133166909217834,
      "learning_rate": 0.0001122059366917108,
      "loss": 0.2763,
      "step": 3569
    },
    {
      "epoch": 1.3183161004431314,
      "grad_norm": 0.29099568724632263,
      "learning_rate": 0.00011218130311614732,
      "loss": 0.2607,
      "step": 3570
    },
    {
      "epoch": 1.318685376661743,
      "grad_norm": 0.25981763005256653,
      "learning_rate": 0.00011215666954058381,
      "loss": 0.2147,
      "step": 3571
    },
    {
      "epoch": 1.3190546528803546,
      "grad_norm": 0.2595869302749634,
      "learning_rate": 0.00011213203596502033,
      "loss": 0.2371,
      "step": 3572
    },
    {
      "epoch": 1.319423929098966,
      "grad_norm": 0.3137706518173218,
      "learning_rate": 0.00011210740238945683,
      "loss": 0.2162,
      "step": 3573
    },
    {
      "epoch": 1.3197932053175776,
      "grad_norm": 0.28788837790489197,
      "learning_rate": 0.00011208276881389334,
      "loss": 0.2508,
      "step": 3574
    },
    {
      "epoch": 1.320162481536189,
      "grad_norm": 0.29614338278770447,
      "learning_rate": 0.00011205813523832984,
      "loss": 0.2548,
      "step": 3575
    },
    {
      "epoch": 1.3205317577548006,
      "grad_norm": 0.3168517053127289,
      "learning_rate": 0.00011203350166276636,
      "loss": 0.2657,
      "step": 3576
    },
    {
      "epoch": 1.3209010339734122,
      "grad_norm": 0.250918447971344,
      "learning_rate": 0.00011200886808720286,
      "loss": 0.2235,
      "step": 3577
    },
    {
      "epoch": 1.3212703101920236,
      "grad_norm": 0.23113346099853516,
      "learning_rate": 0.00011198423451163937,
      "loss": 0.2034,
      "step": 3578
    },
    {
      "epoch": 1.3216395864106352,
      "grad_norm": 0.25568103790283203,
      "learning_rate": 0.00011195960093607587,
      "loss": 0.2168,
      "step": 3579
    },
    {
      "epoch": 1.3220088626292466,
      "grad_norm": 0.3186097741127014,
      "learning_rate": 0.00011193496736051239,
      "loss": 0.2325,
      "step": 3580
    },
    {
      "epoch": 1.3223781388478582,
      "grad_norm": 0.2718065679073334,
      "learning_rate": 0.00011191033378494889,
      "loss": 0.22,
      "step": 3581
    },
    {
      "epoch": 1.3227474150664698,
      "grad_norm": 0.23766477406024933,
      "learning_rate": 0.0001118857002093854,
      "loss": 0.2225,
      "step": 3582
    },
    {
      "epoch": 1.3231166912850814,
      "grad_norm": 0.256465345621109,
      "learning_rate": 0.00011186106663382189,
      "loss": 0.2044,
      "step": 3583
    },
    {
      "epoch": 1.3234859675036927,
      "grad_norm": 0.2958122491836548,
      "learning_rate": 0.00011183643305825842,
      "loss": 0.2402,
      "step": 3584
    },
    {
      "epoch": 1.3238552437223043,
      "grad_norm": 0.3272998034954071,
      "learning_rate": 0.00011181179948269491,
      "loss": 0.2863,
      "step": 3585
    },
    {
      "epoch": 1.3242245199409157,
      "grad_norm": 0.31955695152282715,
      "learning_rate": 0.00011178716590713144,
      "loss": 0.268,
      "step": 3586
    },
    {
      "epoch": 1.3245937961595273,
      "grad_norm": 0.26362356543540955,
      "learning_rate": 0.00011176253233156792,
      "loss": 0.201,
      "step": 3587
    },
    {
      "epoch": 1.324963072378139,
      "grad_norm": 0.23116347193717957,
      "learning_rate": 0.00011173789875600444,
      "loss": 0.2013,
      "step": 3588
    },
    {
      "epoch": 1.3253323485967503,
      "grad_norm": 0.3511759638786316,
      "learning_rate": 0.00011171326518044094,
      "loss": 0.3629,
      "step": 3589
    },
    {
      "epoch": 1.325701624815362,
      "grad_norm": 0.28257256746292114,
      "learning_rate": 0.00011168863160487745,
      "loss": 0.2534,
      "step": 3590
    },
    {
      "epoch": 1.3260709010339733,
      "grad_norm": 0.29674193263053894,
      "learning_rate": 0.00011166399802931396,
      "loss": 0.231,
      "step": 3591
    },
    {
      "epoch": 1.326440177252585,
      "grad_norm": 0.33192455768585205,
      "learning_rate": 0.00011163936445375047,
      "loss": 0.2413,
      "step": 3592
    },
    {
      "epoch": 1.3268094534711965,
      "grad_norm": 0.24622425436973572,
      "learning_rate": 0.00011161473087818697,
      "loss": 0.2075,
      "step": 3593
    },
    {
      "epoch": 1.327178729689808,
      "grad_norm": 0.2626505196094513,
      "learning_rate": 0.00011159009730262349,
      "loss": 0.2626,
      "step": 3594
    },
    {
      "epoch": 1.3275480059084195,
      "grad_norm": 0.25430116057395935,
      "learning_rate": 0.00011156546372705999,
      "loss": 0.1966,
      "step": 3595
    },
    {
      "epoch": 1.327917282127031,
      "grad_norm": 0.25840574502944946,
      "learning_rate": 0.0001115408301514965,
      "loss": 0.245,
      "step": 3596
    },
    {
      "epoch": 1.3282865583456425,
      "grad_norm": 0.2602691650390625,
      "learning_rate": 0.000111516196575933,
      "loss": 0.2162,
      "step": 3597
    },
    {
      "epoch": 1.328655834564254,
      "grad_norm": 0.3222030699253082,
      "learning_rate": 0.00011149156300036952,
      "loss": 0.2458,
      "step": 3598
    },
    {
      "epoch": 1.3290251107828657,
      "grad_norm": 0.3286999762058258,
      "learning_rate": 0.000111466929424806,
      "loss": 0.253,
      "step": 3599
    },
    {
      "epoch": 1.329394387001477,
      "grad_norm": 0.2511175274848938,
      "learning_rate": 0.00011144229584924253,
      "loss": 0.2356,
      "step": 3600
    },
    {
      "epoch": 1.329394387001477,
      "eval_loss": 0.26840391755104065,
      "eval_runtime": 5.8604,
      "eval_samples_per_second": 8.532,
      "eval_steps_per_second": 1.194,
      "step": 3600
    },
    {
      "epoch": 1.3297636632200887,
      "grad_norm": 0.2840924561023712,
      "learning_rate": 0.00011141766227367902,
      "loss": 0.2471,
      "step": 3601
    },
    {
      "epoch": 1.3301329394387,
      "grad_norm": 0.2691866159439087,
      "learning_rate": 0.00011139302869811555,
      "loss": 0.218,
      "step": 3602
    },
    {
      "epoch": 1.3305022156573116,
      "grad_norm": 0.25245973467826843,
      "learning_rate": 0.00011136839512255204,
      "loss": 0.2343,
      "step": 3603
    },
    {
      "epoch": 1.3308714918759232,
      "grad_norm": 0.3062613904476166,
      "learning_rate": 0.00011134376154698855,
      "loss": 0.2687,
      "step": 3604
    },
    {
      "epoch": 1.3312407680945348,
      "grad_norm": 0.2301330268383026,
      "learning_rate": 0.00011131912797142505,
      "loss": 0.2069,
      "step": 3605
    },
    {
      "epoch": 1.3316100443131462,
      "grad_norm": 0.22849799692630768,
      "learning_rate": 0.00011129449439586157,
      "loss": 0.2116,
      "step": 3606
    },
    {
      "epoch": 1.3319793205317578,
      "grad_norm": 0.32399725914001465,
      "learning_rate": 0.00011126986082029807,
      "loss": 0.3216,
      "step": 3607
    },
    {
      "epoch": 1.3323485967503692,
      "grad_norm": 0.3028990924358368,
      "learning_rate": 0.00011124522724473458,
      "loss": 0.2015,
      "step": 3608
    },
    {
      "epoch": 1.3327178729689808,
      "grad_norm": 0.25538018345832825,
      "learning_rate": 0.00011122059366917108,
      "loss": 0.234,
      "step": 3609
    },
    {
      "epoch": 1.3330871491875924,
      "grad_norm": 0.27684277296066284,
      "learning_rate": 0.0001111959600936076,
      "loss": 0.2358,
      "step": 3610
    },
    {
      "epoch": 1.3334564254062038,
      "grad_norm": 0.297973096370697,
      "learning_rate": 0.0001111713265180441,
      "loss": 0.2258,
      "step": 3611
    },
    {
      "epoch": 1.3338257016248154,
      "grad_norm": 0.25658944249153137,
      "learning_rate": 0.00011114669294248061,
      "loss": 0.2267,
      "step": 3612
    },
    {
      "epoch": 1.3341949778434268,
      "grad_norm": 0.27613380551338196,
      "learning_rate": 0.00011112205936691711,
      "loss": 0.2106,
      "step": 3613
    },
    {
      "epoch": 1.3345642540620384,
      "grad_norm": 0.3037564754486084,
      "learning_rate": 0.00011109742579135363,
      "loss": 0.2435,
      "step": 3614
    },
    {
      "epoch": 1.33493353028065,
      "grad_norm": 0.2635299265384674,
      "learning_rate": 0.00011107279221579012,
      "loss": 0.2252,
      "step": 3615
    },
    {
      "epoch": 1.3353028064992616,
      "grad_norm": 0.24047908186912537,
      "learning_rate": 0.00011104815864022664,
      "loss": 0.182,
      "step": 3616
    },
    {
      "epoch": 1.335672082717873,
      "grad_norm": 0.22222352027893066,
      "learning_rate": 0.00011102352506466313,
      "loss": 0.2407,
      "step": 3617
    },
    {
      "epoch": 1.3360413589364846,
      "grad_norm": 0.33356255292892456,
      "learning_rate": 0.00011099889148909966,
      "loss": 0.28,
      "step": 3618
    },
    {
      "epoch": 1.336410635155096,
      "grad_norm": 0.3413843810558319,
      "learning_rate": 0.00011097425791353615,
      "loss": 0.2963,
      "step": 3619
    },
    {
      "epoch": 1.3367799113737076,
      "grad_norm": 0.3379725217819214,
      "learning_rate": 0.00011094962433797266,
      "loss": 0.2675,
      "step": 3620
    },
    {
      "epoch": 1.3371491875923192,
      "grad_norm": 0.2826775908470154,
      "learning_rate": 0.00011092499076240916,
      "loss": 0.2481,
      "step": 3621
    },
    {
      "epoch": 1.3375184638109305,
      "grad_norm": 0.28579071164131165,
      "learning_rate": 0.00011090035718684568,
      "loss": 0.2113,
      "step": 3622
    },
    {
      "epoch": 1.3378877400295421,
      "grad_norm": 0.29486680030822754,
      "learning_rate": 0.00011087572361128218,
      "loss": 0.2591,
      "step": 3623
    },
    {
      "epoch": 1.3382570162481535,
      "grad_norm": 0.32082071900367737,
      "learning_rate": 0.0001108510900357187,
      "loss": 0.2274,
      "step": 3624
    },
    {
      "epoch": 1.3386262924667651,
      "grad_norm": 0.3154670000076294,
      "learning_rate": 0.0001108264564601552,
      "loss": 0.2413,
      "step": 3625
    },
    {
      "epoch": 1.3389955686853767,
      "grad_norm": 0.36913928389549255,
      "learning_rate": 0.00011080182288459171,
      "loss": 0.2319,
      "step": 3626
    },
    {
      "epoch": 1.339364844903988,
      "grad_norm": 0.3428744375705719,
      "learning_rate": 0.00011077718930902821,
      "loss": 0.2275,
      "step": 3627
    },
    {
      "epoch": 1.3397341211225997,
      "grad_norm": 0.3373352289199829,
      "learning_rate": 0.00011075255573346473,
      "loss": 0.2676,
      "step": 3628
    },
    {
      "epoch": 1.340103397341211,
      "grad_norm": 0.27837231755256653,
      "learning_rate": 0.00011072792215790123,
      "loss": 0.2508,
      "step": 3629
    },
    {
      "epoch": 1.3404726735598227,
      "grad_norm": 0.25249144434928894,
      "learning_rate": 0.00011070328858233774,
      "loss": 0.2428,
      "step": 3630
    },
    {
      "epoch": 1.3408419497784343,
      "grad_norm": 0.32394635677337646,
      "learning_rate": 0.00011067865500677423,
      "loss": 0.2871,
      "step": 3631
    },
    {
      "epoch": 1.341211225997046,
      "grad_norm": 0.27134162187576294,
      "learning_rate": 0.00011065402143121076,
      "loss": 0.2512,
      "step": 3632
    },
    {
      "epoch": 1.3415805022156573,
      "grad_norm": 0.2610749304294586,
      "learning_rate": 0.00011062938785564724,
      "loss": 0.1917,
      "step": 3633
    },
    {
      "epoch": 1.3419497784342689,
      "grad_norm": 0.30713263154029846,
      "learning_rate": 0.00011060475428008377,
      "loss": 0.258,
      "step": 3634
    },
    {
      "epoch": 1.3423190546528803,
      "grad_norm": 0.24225565791130066,
      "learning_rate": 0.00011058012070452026,
      "loss": 0.214,
      "step": 3635
    },
    {
      "epoch": 1.3426883308714919,
      "grad_norm": 0.261345237493515,
      "learning_rate": 0.00011055548712895677,
      "loss": 0.2562,
      "step": 3636
    },
    {
      "epoch": 1.3430576070901035,
      "grad_norm": 0.26719626784324646,
      "learning_rate": 0.00011053085355339328,
      "loss": 0.2272,
      "step": 3637
    },
    {
      "epoch": 1.3434268833087148,
      "grad_norm": 0.25453051924705505,
      "learning_rate": 0.00011050621997782979,
      "loss": 0.2172,
      "step": 3638
    },
    {
      "epoch": 1.3437961595273265,
      "grad_norm": 0.2616005539894104,
      "learning_rate": 0.00011048158640226629,
      "loss": 0.2432,
      "step": 3639
    },
    {
      "epoch": 1.3441654357459378,
      "grad_norm": 0.2986209988594055,
      "learning_rate": 0.0001104569528267028,
      "loss": 0.2212,
      "step": 3640
    },
    {
      "epoch": 1.3445347119645494,
      "grad_norm": 0.20458440482616425,
      "learning_rate": 0.00011043231925113931,
      "loss": 0.1946,
      "step": 3641
    },
    {
      "epoch": 1.344903988183161,
      "grad_norm": 0.2503405511379242,
      "learning_rate": 0.00011040768567557582,
      "loss": 0.2067,
      "step": 3642
    },
    {
      "epoch": 1.3452732644017726,
      "grad_norm": 0.2201780378818512,
      "learning_rate": 0.00011038305210001232,
      "loss": 0.1827,
      "step": 3643
    },
    {
      "epoch": 1.345642540620384,
      "grad_norm": 0.24748550355434418,
      "learning_rate": 0.00011035841852444884,
      "loss": 0.2154,
      "step": 3644
    },
    {
      "epoch": 1.3460118168389956,
      "grad_norm": 0.30875056982040405,
      "learning_rate": 0.00011033378494888534,
      "loss": 0.2138,
      "step": 3645
    },
    {
      "epoch": 1.346381093057607,
      "grad_norm": 0.3330707848072052,
      "learning_rate": 0.00011030915137332185,
      "loss": 0.267,
      "step": 3646
    },
    {
      "epoch": 1.3467503692762186,
      "grad_norm": 0.2122422307729721,
      "learning_rate": 0.00011028451779775834,
      "loss": 0.1964,
      "step": 3647
    },
    {
      "epoch": 1.3471196454948302,
      "grad_norm": 0.33327516913414,
      "learning_rate": 0.00011025988422219487,
      "loss": 0.2413,
      "step": 3648
    },
    {
      "epoch": 1.3474889217134416,
      "grad_norm": 0.2685951292514801,
      "learning_rate": 0.00011023525064663136,
      "loss": 0.2271,
      "step": 3649
    },
    {
      "epoch": 1.3478581979320532,
      "grad_norm": 0.2989233434200287,
      "learning_rate": 0.00011021061707106789,
      "loss": 0.2366,
      "step": 3650
    },
    {
      "epoch": 1.3478581979320532,
      "eval_loss": 0.27142253518104553,
      "eval_runtime": 5.8581,
      "eval_samples_per_second": 8.535,
      "eval_steps_per_second": 1.195,
      "step": 3650
    },
    {
      "epoch": 1.3482274741506646,
      "grad_norm": 0.31434720754623413,
      "learning_rate": 0.00011018598349550437,
      "loss": 0.2186,
      "step": 3651
    },
    {
      "epoch": 1.3485967503692762,
      "grad_norm": 0.2665393054485321,
      "learning_rate": 0.00011016134991994089,
      "loss": 0.2416,
      "step": 3652
    },
    {
      "epoch": 1.3489660265878878,
      "grad_norm": 0.251358300447464,
      "learning_rate": 0.00011013671634437739,
      "loss": 0.2153,
      "step": 3653
    },
    {
      "epoch": 1.3493353028064994,
      "grad_norm": 0.2691832184791565,
      "learning_rate": 0.0001101120827688139,
      "loss": 0.2225,
      "step": 3654
    },
    {
      "epoch": 1.3497045790251108,
      "grad_norm": 0.25989165902137756,
      "learning_rate": 0.0001100874491932504,
      "loss": 0.2588,
      "step": 3655
    },
    {
      "epoch": 1.3500738552437224,
      "grad_norm": 0.31075742840766907,
      "learning_rate": 0.00011006281561768692,
      "loss": 0.267,
      "step": 3656
    },
    {
      "epoch": 1.3504431314623337,
      "grad_norm": 0.28866028785705566,
      "learning_rate": 0.00011003818204212342,
      "loss": 0.2414,
      "step": 3657
    },
    {
      "epoch": 1.3508124076809453,
      "grad_norm": 0.24472151696681976,
      "learning_rate": 0.00011001354846655993,
      "loss": 0.2098,
      "step": 3658
    },
    {
      "epoch": 1.351181683899557,
      "grad_norm": 0.28179073333740234,
      "learning_rate": 0.00010998891489099644,
      "loss": 0.2555,
      "step": 3659
    },
    {
      "epoch": 1.3515509601181683,
      "grad_norm": 0.33718785643577576,
      "learning_rate": 0.00010996428131543295,
      "loss": 0.3021,
      "step": 3660
    },
    {
      "epoch": 1.35192023633678,
      "grad_norm": 0.2833876311779022,
      "learning_rate": 0.00010993964773986945,
      "loss": 0.2451,
      "step": 3661
    },
    {
      "epoch": 1.3522895125553913,
      "grad_norm": 0.30380919575691223,
      "learning_rate": 0.00010991501416430597,
      "loss": 0.2152,
      "step": 3662
    },
    {
      "epoch": 1.352658788774003,
      "grad_norm": 0.2670554220676422,
      "learning_rate": 0.00010989038058874245,
      "loss": 0.2037,
      "step": 3663
    },
    {
      "epoch": 1.3530280649926145,
      "grad_norm": 0.2859307825565338,
      "learning_rate": 0.00010986574701317898,
      "loss": 0.2597,
      "step": 3664
    },
    {
      "epoch": 1.3533973412112261,
      "grad_norm": 0.27518805861473083,
      "learning_rate": 0.00010984111343761547,
      "loss": 0.2299,
      "step": 3665
    },
    {
      "epoch": 1.3537666174298375,
      "grad_norm": 0.2528857886791229,
      "learning_rate": 0.000109816479862052,
      "loss": 0.2177,
      "step": 3666
    },
    {
      "epoch": 1.354135893648449,
      "grad_norm": 0.31784820556640625,
      "learning_rate": 0.00010979184628648848,
      "loss": 0.2466,
      "step": 3667
    },
    {
      "epoch": 1.3545051698670605,
      "grad_norm": 0.2805560827255249,
      "learning_rate": 0.000109767212710925,
      "loss": 0.2369,
      "step": 3668
    },
    {
      "epoch": 1.354874446085672,
      "grad_norm": 0.30466175079345703,
      "learning_rate": 0.0001097425791353615,
      "loss": 0.2562,
      "step": 3669
    },
    {
      "epoch": 1.3552437223042837,
      "grad_norm": 0.2664591670036316,
      "learning_rate": 0.00010971794555979802,
      "loss": 0.2238,
      "step": 3670
    },
    {
      "epoch": 1.355612998522895,
      "grad_norm": 0.26514458656311035,
      "learning_rate": 0.00010969331198423452,
      "loss": 0.227,
      "step": 3671
    },
    {
      "epoch": 1.3559822747415067,
      "grad_norm": 0.31222182512283325,
      "learning_rate": 0.00010966867840867103,
      "loss": 0.2218,
      "step": 3672
    },
    {
      "epoch": 1.356351550960118,
      "grad_norm": 0.2709215581417084,
      "learning_rate": 0.00010964404483310753,
      "loss": 0.2398,
      "step": 3673
    },
    {
      "epoch": 1.3567208271787297,
      "grad_norm": 0.3307381272315979,
      "learning_rate": 0.00010961941125754405,
      "loss": 0.2217,
      "step": 3674
    },
    {
      "epoch": 1.3570901033973413,
      "grad_norm": 0.27952146530151367,
      "learning_rate": 0.00010959477768198055,
      "loss": 0.2679,
      "step": 3675
    },
    {
      "epoch": 1.3574593796159529,
      "grad_norm": 0.31884104013442993,
      "learning_rate": 0.00010957014410641706,
      "loss": 0.2754,
      "step": 3676
    },
    {
      "epoch": 1.3578286558345642,
      "grad_norm": 0.29868417978286743,
      "learning_rate": 0.00010954551053085356,
      "loss": 0.226,
      "step": 3677
    },
    {
      "epoch": 1.3581979320531758,
      "grad_norm": 0.24867242574691772,
      "learning_rate": 0.00010952087695529008,
      "loss": 0.2512,
      "step": 3678
    },
    {
      "epoch": 1.3585672082717872,
      "grad_norm": 0.2631646692752838,
      "learning_rate": 0.00010949624337972657,
      "loss": 0.2219,
      "step": 3679
    },
    {
      "epoch": 1.3589364844903988,
      "grad_norm": 0.2733980417251587,
      "learning_rate": 0.0001094716098041631,
      "loss": 0.2473,
      "step": 3680
    },
    {
      "epoch": 1.3593057607090104,
      "grad_norm": 0.2543134391307831,
      "learning_rate": 0.00010944697622859958,
      "loss": 0.2016,
      "step": 3681
    },
    {
      "epoch": 1.3596750369276218,
      "grad_norm": 0.2649345397949219,
      "learning_rate": 0.00010942234265303611,
      "loss": 0.1919,
      "step": 3682
    },
    {
      "epoch": 1.3600443131462334,
      "grad_norm": 0.3075491487979889,
      "learning_rate": 0.0001093977090774726,
      "loss": 0.2867,
      "step": 3683
    },
    {
      "epoch": 1.3604135893648448,
      "grad_norm": 0.2779380679130554,
      "learning_rate": 0.00010937307550190911,
      "loss": 0.2578,
      "step": 3684
    },
    {
      "epoch": 1.3607828655834564,
      "grad_norm": 0.29548507928848267,
      "learning_rate": 0.00010934844192634561,
      "loss": 0.2865,
      "step": 3685
    },
    {
      "epoch": 1.361152141802068,
      "grad_norm": 0.26643845438957214,
      "learning_rate": 0.00010932380835078213,
      "loss": 0.2224,
      "step": 3686
    },
    {
      "epoch": 1.3615214180206794,
      "grad_norm": 0.2854791581630707,
      "learning_rate": 0.00010929917477521863,
      "loss": 0.2617,
      "step": 3687
    },
    {
      "epoch": 1.361890694239291,
      "grad_norm": 0.20975206792354584,
      "learning_rate": 0.00010927454119965514,
      "loss": 0.1908,
      "step": 3688
    },
    {
      "epoch": 1.3622599704579026,
      "grad_norm": 0.26472845673561096,
      "learning_rate": 0.00010924990762409164,
      "loss": 0.2211,
      "step": 3689
    },
    {
      "epoch": 1.362629246676514,
      "grad_norm": 0.3006521463394165,
      "learning_rate": 0.00010922527404852816,
      "loss": 0.2687,
      "step": 3690
    },
    {
      "epoch": 1.3629985228951256,
      "grad_norm": 0.25495338439941406,
      "learning_rate": 0.00010920064047296466,
      "loss": 0.2094,
      "step": 3691
    },
    {
      "epoch": 1.3633677991137372,
      "grad_norm": 0.3352770507335663,
      "learning_rate": 0.00010917600689740117,
      "loss": 0.2512,
      "step": 3692
    },
    {
      "epoch": 1.3637370753323486,
      "grad_norm": 0.26214849948883057,
      "learning_rate": 0.00010915137332183766,
      "loss": 0.2353,
      "step": 3693
    },
    {
      "epoch": 1.3641063515509602,
      "grad_norm": 0.24108755588531494,
      "learning_rate": 0.00010912673974627419,
      "loss": 0.1749,
      "step": 3694
    },
    {
      "epoch": 1.3644756277695715,
      "grad_norm": 0.2967040538787842,
      "learning_rate": 0.00010910210617071068,
      "loss": 0.2393,
      "step": 3695
    },
    {
      "epoch": 1.3648449039881831,
      "grad_norm": 0.282380610704422,
      "learning_rate": 0.0001090774725951472,
      "loss": 0.2463,
      "step": 3696
    },
    {
      "epoch": 1.3652141802067947,
      "grad_norm": 0.2374754101037979,
      "learning_rate": 0.0001090528390195837,
      "loss": 0.2207,
      "step": 3697
    },
    {
      "epoch": 1.3655834564254061,
      "grad_norm": 0.2998552620410919,
      "learning_rate": 0.00010902820544402021,
      "loss": 0.2802,
      "step": 3698
    },
    {
      "epoch": 1.3659527326440177,
      "grad_norm": 0.2999780774116516,
      "learning_rate": 0.00010900357186845671,
      "loss": 0.2614,
      "step": 3699
    },
    {
      "epoch": 1.3663220088626291,
      "grad_norm": 0.31329798698425293,
      "learning_rate": 0.00010897893829289322,
      "loss": 0.2861,
      "step": 3700
    },
    {
      "epoch": 1.3663220088626291,
      "eval_loss": 0.27130240201950073,
      "eval_runtime": 5.86,
      "eval_samples_per_second": 8.532,
      "eval_steps_per_second": 1.195,
      "step": 3700
    },
    {
      "epoch": 1.3666912850812407,
      "grad_norm": 0.2652784287929535,
      "learning_rate": 0.00010895430471732973,
      "loss": 0.2767,
      "step": 3701
    },
    {
      "epoch": 1.3670605612998523,
      "grad_norm": 0.32752612233161926,
      "learning_rate": 0.00010892967114176624,
      "loss": 0.2668,
      "step": 3702
    },
    {
      "epoch": 1.367429837518464,
      "grad_norm": 0.32744166254997253,
      "learning_rate": 0.00010890503756620274,
      "loss": 0.2559,
      "step": 3703
    },
    {
      "epoch": 1.3677991137370753,
      "grad_norm": 0.2567864656448364,
      "learning_rate": 0.00010888040399063926,
      "loss": 0.2455,
      "step": 3704
    },
    {
      "epoch": 1.368168389955687,
      "grad_norm": 0.3054686188697815,
      "learning_rate": 0.00010885577041507576,
      "loss": 0.2589,
      "step": 3705
    },
    {
      "epoch": 1.3685376661742983,
      "grad_norm": 0.27559489011764526,
      "learning_rate": 0.00010883113683951227,
      "loss": 0.2532,
      "step": 3706
    },
    {
      "epoch": 1.3689069423929099,
      "grad_norm": 0.26456356048583984,
      "learning_rate": 0.00010880650326394877,
      "loss": 0.2241,
      "step": 3707
    },
    {
      "epoch": 1.3692762186115215,
      "grad_norm": 0.2512317895889282,
      "learning_rate": 0.00010878186968838529,
      "loss": 0.2225,
      "step": 3708
    },
    {
      "epoch": 1.3696454948301329,
      "grad_norm": 0.2673681974411011,
      "learning_rate": 0.00010875723611282177,
      "loss": 0.2313,
      "step": 3709
    },
    {
      "epoch": 1.3700147710487445,
      "grad_norm": 0.30007287859916687,
      "learning_rate": 0.0001087326025372583,
      "loss": 0.2213,
      "step": 3710
    },
    {
      "epoch": 1.3703840472673559,
      "grad_norm": 0.3304269313812256,
      "learning_rate": 0.00010870796896169479,
      "loss": 0.2427,
      "step": 3711
    },
    {
      "epoch": 1.3707533234859675,
      "grad_norm": 0.2871852517127991,
      "learning_rate": 0.00010868333538613132,
      "loss": 0.2533,
      "step": 3712
    },
    {
      "epoch": 1.371122599704579,
      "grad_norm": 0.23127683997154236,
      "learning_rate": 0.0001086587018105678,
      "loss": 0.2301,
      "step": 3713
    },
    {
      "epoch": 1.3714918759231907,
      "grad_norm": 0.31266435980796814,
      "learning_rate": 0.00010863406823500432,
      "loss": 0.2789,
      "step": 3714
    },
    {
      "epoch": 1.371861152141802,
      "grad_norm": 0.28091543912887573,
      "learning_rate": 0.00010860943465944082,
      "loss": 0.2336,
      "step": 3715
    },
    {
      "epoch": 1.3722304283604136,
      "grad_norm": 0.3167265057563782,
      "learning_rate": 0.00010858480108387734,
      "loss": 0.2452,
      "step": 3716
    },
    {
      "epoch": 1.372599704579025,
      "grad_norm": 0.3556848466396332,
      "learning_rate": 0.00010856016750831384,
      "loss": 0.3206,
      "step": 3717
    },
    {
      "epoch": 1.3729689807976366,
      "grad_norm": 0.490351140499115,
      "learning_rate": 0.00010853553393275034,
      "loss": 0.2759,
      "step": 3718
    },
    {
      "epoch": 1.3733382570162482,
      "grad_norm": 0.29599419236183167,
      "learning_rate": 0.00010851090035718685,
      "loss": 0.2617,
      "step": 3719
    },
    {
      "epoch": 1.3737075332348596,
      "grad_norm": 0.2695614993572235,
      "learning_rate": 0.00010848626678162334,
      "loss": 0.2742,
      "step": 3720
    },
    {
      "epoch": 1.3740768094534712,
      "grad_norm": 0.27813664078712463,
      "learning_rate": 0.00010846163320605987,
      "loss": 0.2292,
      "step": 3721
    },
    {
      "epoch": 1.3744460856720826,
      "grad_norm": 0.21287007629871368,
      "learning_rate": 0.00010843699963049636,
      "loss": 0.1806,
      "step": 3722
    },
    {
      "epoch": 1.3748153618906942,
      "grad_norm": 0.26846447587013245,
      "learning_rate": 0.00010841236605493288,
      "loss": 0.2734,
      "step": 3723
    },
    {
      "epoch": 1.3751846381093058,
      "grad_norm": 0.2294163703918457,
      "learning_rate": 0.00010838773247936937,
      "loss": 0.2152,
      "step": 3724
    },
    {
      "epoch": 1.3755539143279174,
      "grad_norm": 0.3586331009864807,
      "learning_rate": 0.00010836309890380589,
      "loss": 0.2278,
      "step": 3725
    },
    {
      "epoch": 1.3759231905465288,
      "grad_norm": 0.254730224609375,
      "learning_rate": 0.00010833846532824239,
      "loss": 0.2161,
      "step": 3726
    },
    {
      "epoch": 1.3762924667651404,
      "grad_norm": 0.23388056457042694,
      "learning_rate": 0.0001083138317526789,
      "loss": 0.2165,
      "step": 3727
    },
    {
      "epoch": 1.3766617429837518,
      "grad_norm": 0.2615768015384674,
      "learning_rate": 0.0001082891981771154,
      "loss": 0.2066,
      "step": 3728
    },
    {
      "epoch": 1.3770310192023634,
      "grad_norm": 0.2843790650367737,
      "learning_rate": 0.00010826456460155192,
      "loss": 0.2206,
      "step": 3729
    },
    {
      "epoch": 1.377400295420975,
      "grad_norm": 0.31225937604904175,
      "learning_rate": 0.00010823993102598842,
      "loss": 0.2974,
      "step": 3730
    },
    {
      "epoch": 1.3777695716395864,
      "grad_norm": 0.25193703174591064,
      "learning_rate": 0.00010821529745042493,
      "loss": 0.2215,
      "step": 3731
    },
    {
      "epoch": 1.378138847858198,
      "grad_norm": 0.2730855941772461,
      "learning_rate": 0.00010819066387486143,
      "loss": 0.2546,
      "step": 3732
    },
    {
      "epoch": 1.3785081240768093,
      "grad_norm": 0.21629397571086884,
      "learning_rate": 0.00010816603029929795,
      "loss": 0.2282,
      "step": 3733
    },
    {
      "epoch": 1.378877400295421,
      "grad_norm": 0.30740419030189514,
      "learning_rate": 0.00010814139672373445,
      "loss": 0.26,
      "step": 3734
    },
    {
      "epoch": 1.3792466765140325,
      "grad_norm": 0.2641306221485138,
      "learning_rate": 0.00010811676314817097,
      "loss": 0.2159,
      "step": 3735
    },
    {
      "epoch": 1.3796159527326441,
      "grad_norm": 0.32722675800323486,
      "learning_rate": 0.00010809212957260745,
      "loss": 0.2639,
      "step": 3736
    },
    {
      "epoch": 1.3799852289512555,
      "grad_norm": 0.29486608505249023,
      "learning_rate": 0.00010806749599704398,
      "loss": 0.2597,
      "step": 3737
    },
    {
      "epoch": 1.3803545051698671,
      "grad_norm": 0.30441945791244507,
      "learning_rate": 0.00010804286242148047,
      "loss": 0.2393,
      "step": 3738
    },
    {
      "epoch": 1.3807237813884785,
      "grad_norm": 0.26642045378685,
      "learning_rate": 0.000108018228845917,
      "loss": 0.2471,
      "step": 3739
    },
    {
      "epoch": 1.3810930576070901,
      "grad_norm": 0.28097429871559143,
      "learning_rate": 0.00010799359527035348,
      "loss": 0.2535,
      "step": 3740
    },
    {
      "epoch": 1.3814623338257017,
      "grad_norm": 0.23791688680648804,
      "learning_rate": 0.00010796896169479,
      "loss": 0.197,
      "step": 3741
    },
    {
      "epoch": 1.381831610044313,
      "grad_norm": 0.24202603101730347,
      "learning_rate": 0.0001079443281192265,
      "loss": 0.1952,
      "step": 3742
    },
    {
      "epoch": 1.3822008862629247,
      "grad_norm": 0.24361658096313477,
      "learning_rate": 0.00010791969454366301,
      "loss": 0.2197,
      "step": 3743
    },
    {
      "epoch": 1.382570162481536,
      "grad_norm": 0.3844248652458191,
      "learning_rate": 0.00010789506096809952,
      "loss": 0.2484,
      "step": 3744
    },
    {
      "epoch": 1.3829394387001477,
      "grad_norm": 0.26775461435317993,
      "learning_rate": 0.00010787042739253603,
      "loss": 0.2645,
      "step": 3745
    },
    {
      "epoch": 1.3833087149187593,
      "grad_norm": 0.31835201382637024,
      "learning_rate": 0.00010784579381697253,
      "loss": 0.2766,
      "step": 3746
    },
    {
      "epoch": 1.3836779911373709,
      "grad_norm": 0.2494579553604126,
      "learning_rate": 0.00010782116024140905,
      "loss": 0.2101,
      "step": 3747
    },
    {
      "epoch": 1.3840472673559823,
      "grad_norm": 0.24214380979537964,
      "learning_rate": 0.00010779652666584555,
      "loss": 0.1978,
      "step": 3748
    },
    {
      "epoch": 1.3844165435745939,
      "grad_norm": 0.2529049813747406,
      "learning_rate": 0.00010777189309028206,
      "loss": 0.2452,
      "step": 3749
    },
    {
      "epoch": 1.3847858197932053,
      "grad_norm": 0.30903905630111694,
      "learning_rate": 0.00010774725951471856,
      "loss": 0.2745,
      "step": 3750
    },
    {
      "epoch": 1.3847858197932053,
      "eval_loss": 0.2686930000782013,
      "eval_runtime": 5.861,
      "eval_samples_per_second": 8.531,
      "eval_steps_per_second": 1.194,
      "step": 3750
    },
    {
      "epoch": 1.3851550960118169,
      "grad_norm": 0.3068532943725586,
      "learning_rate": 0.00010772262593915508,
      "loss": 0.253,
      "step": 3751
    },
    {
      "epoch": 1.3855243722304285,
      "grad_norm": 0.26122722029685974,
      "learning_rate": 0.00010769799236359157,
      "loss": 0.2271,
      "step": 3752
    },
    {
      "epoch": 1.3858936484490398,
      "grad_norm": 0.2446070909500122,
      "learning_rate": 0.0001076733587880281,
      "loss": 0.2386,
      "step": 3753
    },
    {
      "epoch": 1.3862629246676514,
      "grad_norm": 0.30802103877067566,
      "learning_rate": 0.00010764872521246458,
      "loss": 0.2234,
      "step": 3754
    },
    {
      "epoch": 1.3866322008862628,
      "grad_norm": 0.26052504777908325,
      "learning_rate": 0.00010762409163690111,
      "loss": 0.2106,
      "step": 3755
    },
    {
      "epoch": 1.3870014771048744,
      "grad_norm": 0.275816410779953,
      "learning_rate": 0.0001075994580613376,
      "loss": 0.2401,
      "step": 3756
    },
    {
      "epoch": 1.387370753323486,
      "grad_norm": 0.32631778717041016,
      "learning_rate": 0.00010757482448577411,
      "loss": 0.2508,
      "step": 3757
    },
    {
      "epoch": 1.3877400295420974,
      "grad_norm": 0.2649644613265991,
      "learning_rate": 0.00010755019091021061,
      "loss": 0.2446,
      "step": 3758
    },
    {
      "epoch": 1.388109305760709,
      "grad_norm": 0.29543444514274597,
      "learning_rate": 0.00010752555733464713,
      "loss": 0.2917,
      "step": 3759
    },
    {
      "epoch": 1.3884785819793206,
      "grad_norm": 0.26043254137039185,
      "learning_rate": 0.00010750092375908363,
      "loss": 0.2437,
      "step": 3760
    },
    {
      "epoch": 1.388847858197932,
      "grad_norm": 0.29932519793510437,
      "learning_rate": 0.00010747629018352014,
      "loss": 0.3138,
      "step": 3761
    },
    {
      "epoch": 1.3892171344165436,
      "grad_norm": 0.26750093698501587,
      "learning_rate": 0.00010745165660795664,
      "loss": 0.2127,
      "step": 3762
    },
    {
      "epoch": 1.3895864106351552,
      "grad_norm": 0.25077375769615173,
      "learning_rate": 0.00010742702303239316,
      "loss": 0.2429,
      "step": 3763
    },
    {
      "epoch": 1.3899556868537666,
      "grad_norm": 0.3207470774650574,
      "learning_rate": 0.00010740238945682966,
      "loss": 0.2614,
      "step": 3764
    },
    {
      "epoch": 1.3903249630723782,
      "grad_norm": 0.2767865061759949,
      "learning_rate": 0.00010737775588126617,
      "loss": 0.2872,
      "step": 3765
    },
    {
      "epoch": 1.3906942392909896,
      "grad_norm": 0.2964353859424591,
      "learning_rate": 0.00010735312230570268,
      "loss": 0.2995,
      "step": 3766
    },
    {
      "epoch": 1.3910635155096012,
      "grad_norm": 0.25089460611343384,
      "learning_rate": 0.00010732848873013919,
      "loss": 0.2297,
      "step": 3767
    },
    {
      "epoch": 1.3914327917282128,
      "grad_norm": 0.2735138237476349,
      "learning_rate": 0.00010730385515457568,
      "loss": 0.2655,
      "step": 3768
    },
    {
      "epoch": 1.3918020679468242,
      "grad_norm": 0.26005735993385315,
      "learning_rate": 0.0001072792215790122,
      "loss": 0.256,
      "step": 3769
    },
    {
      "epoch": 1.3921713441654358,
      "grad_norm": 0.4221028983592987,
      "learning_rate": 0.00010725458800344869,
      "loss": 0.2676,
      "step": 3770
    },
    {
      "epoch": 1.3925406203840471,
      "grad_norm": 0.23463353514671326,
      "learning_rate": 0.00010722995442788522,
      "loss": 0.2255,
      "step": 3771
    },
    {
      "epoch": 1.3929098966026587,
      "grad_norm": 0.2788041830062866,
      "learning_rate": 0.00010720532085232171,
      "loss": 0.2473,
      "step": 3772
    },
    {
      "epoch": 1.3932791728212703,
      "grad_norm": 0.2994675934314728,
      "learning_rate": 0.00010718068727675822,
      "loss": 0.2383,
      "step": 3773
    },
    {
      "epoch": 1.393648449039882,
      "grad_norm": 0.23976139724254608,
      "learning_rate": 0.00010715605370119472,
      "loss": 0.2157,
      "step": 3774
    },
    {
      "epoch": 1.3940177252584933,
      "grad_norm": 0.3295306861400604,
      "learning_rate": 0.00010713142012563124,
      "loss": 0.3445,
      "step": 3775
    },
    {
      "epoch": 1.394387001477105,
      "grad_norm": 0.2559310793876648,
      "learning_rate": 0.00010710678655006774,
      "loss": 0.2262,
      "step": 3776
    },
    {
      "epoch": 1.3947562776957163,
      "grad_norm": 0.2468082755804062,
      "learning_rate": 0.00010708215297450425,
      "loss": 0.2519,
      "step": 3777
    },
    {
      "epoch": 1.395125553914328,
      "grad_norm": 0.4029478430747986,
      "learning_rate": 0.00010705751939894076,
      "loss": 0.2463,
      "step": 3778
    },
    {
      "epoch": 1.3954948301329395,
      "grad_norm": 0.33493492007255554,
      "learning_rate": 0.00010703288582337727,
      "loss": 0.2887,
      "step": 3779
    },
    {
      "epoch": 1.395864106351551,
      "grad_norm": 0.30173709988594055,
      "learning_rate": 0.00010700825224781377,
      "loss": 0.2067,
      "step": 3780
    },
    {
      "epoch": 1.3962333825701625,
      "grad_norm": 0.2795202136039734,
      "learning_rate": 0.00010698361867225029,
      "loss": 0.2169,
      "step": 3781
    },
    {
      "epoch": 1.3966026587887739,
      "grad_norm": 0.2911227345466614,
      "learning_rate": 0.00010695898509668679,
      "loss": 0.2852,
      "step": 3782
    },
    {
      "epoch": 1.3969719350073855,
      "grad_norm": 0.25937220454216003,
      "learning_rate": 0.0001069343515211233,
      "loss": 0.2537,
      "step": 3783
    },
    {
      "epoch": 1.397341211225997,
      "grad_norm": 0.2779325842857361,
      "learning_rate": 0.00010690971794555979,
      "loss": 0.2231,
      "step": 3784
    },
    {
      "epoch": 1.3977104874446087,
      "grad_norm": 0.29453492164611816,
      "learning_rate": 0.00010688508436999632,
      "loss": 0.2488,
      "step": 3785
    },
    {
      "epoch": 1.39807976366322,
      "grad_norm": 0.2754989266395569,
      "learning_rate": 0.0001068604507944328,
      "loss": 0.2496,
      "step": 3786
    },
    {
      "epoch": 1.3984490398818317,
      "grad_norm": 0.31683996319770813,
      "learning_rate": 0.00010683581721886933,
      "loss": 0.2838,
      "step": 3787
    },
    {
      "epoch": 1.398818316100443,
      "grad_norm": 0.23349718749523163,
      "learning_rate": 0.00010681118364330582,
      "loss": 0.2114,
      "step": 3788
    },
    {
      "epoch": 1.3991875923190547,
      "grad_norm": 0.30497369170188904,
      "learning_rate": 0.00010678655006774234,
      "loss": 0.2579,
      "step": 3789
    },
    {
      "epoch": 1.3995568685376663,
      "grad_norm": 0.2672632932662964,
      "learning_rate": 0.00010676191649217884,
      "loss": 0.236,
      "step": 3790
    },
    {
      "epoch": 1.3999261447562776,
      "grad_norm": 0.25800979137420654,
      "learning_rate": 0.00010673728291661535,
      "loss": 0.2195,
      "step": 3791
    },
    {
      "epoch": 1.4002954209748892,
      "grad_norm": 0.29944947361946106,
      "learning_rate": 0.00010671264934105185,
      "loss": 0.269,
      "step": 3792
    },
    {
      "epoch": 1.4006646971935006,
      "grad_norm": 0.23728716373443604,
      "learning_rate": 0.00010668801576548837,
      "loss": 0.232,
      "step": 3793
    },
    {
      "epoch": 1.4010339734121122,
      "grad_norm": 0.27027857303619385,
      "learning_rate": 0.00010666338218992487,
      "loss": 0.2326,
      "step": 3794
    },
    {
      "epoch": 1.4014032496307238,
      "grad_norm": 0.3711899220943451,
      "learning_rate": 0.00010663874861436138,
      "loss": 0.2778,
      "step": 3795
    },
    {
      "epoch": 1.4017725258493354,
      "grad_norm": 0.3063708543777466,
      "learning_rate": 0.00010661411503879788,
      "loss": 0.2505,
      "step": 3796
    },
    {
      "epoch": 1.4021418020679468,
      "grad_norm": 0.3206406533718109,
      "learning_rate": 0.0001065894814632344,
      "loss": 0.2306,
      "step": 3797
    },
    {
      "epoch": 1.4025110782865584,
      "grad_norm": 0.28478190302848816,
      "learning_rate": 0.0001065648478876709,
      "loss": 0.225,
      "step": 3798
    },
    {
      "epoch": 1.4028803545051698,
      "grad_norm": 0.25216224789619446,
      "learning_rate": 0.00010654021431210741,
      "loss": 0.2308,
      "step": 3799
    },
    {
      "epoch": 1.4032496307237814,
      "grad_norm": 0.2996431589126587,
      "learning_rate": 0.0001065155807365439,
      "loss": 0.2577,
      "step": 3800
    },
    {
      "epoch": 1.4032496307237814,
      "eval_loss": 0.2693694233894348,
      "eval_runtime": 5.8472,
      "eval_samples_per_second": 8.551,
      "eval_steps_per_second": 1.197,
      "step": 3800
    },
    {
      "epoch": 1.403618906942393,
      "grad_norm": 0.35670316219329834,
      "learning_rate": 0.00010649094716098043,
      "loss": 0.2303,
      "step": 3801
    },
    {
      "epoch": 1.4039881831610044,
      "grad_norm": 0.23958542943000793,
      "learning_rate": 0.00010646631358541692,
      "loss": 0.2306,
      "step": 3802
    },
    {
      "epoch": 1.404357459379616,
      "grad_norm": 0.32521572709083557,
      "learning_rate": 0.00010644168000985345,
      "loss": 0.2521,
      "step": 3803
    },
    {
      "epoch": 1.4047267355982274,
      "grad_norm": 0.2848784327507019,
      "learning_rate": 0.00010641704643428993,
      "loss": 0.2835,
      "step": 3804
    },
    {
      "epoch": 1.405096011816839,
      "grad_norm": 0.2572305500507355,
      "learning_rate": 0.00010639241285872645,
      "loss": 0.2469,
      "step": 3805
    },
    {
      "epoch": 1.4054652880354506,
      "grad_norm": 0.3307071626186371,
      "learning_rate": 0.00010636777928316295,
      "loss": 0.2501,
      "step": 3806
    },
    {
      "epoch": 1.4058345642540622,
      "grad_norm": 0.2324734479188919,
      "learning_rate": 0.00010634314570759946,
      "loss": 0.2143,
      "step": 3807
    },
    {
      "epoch": 1.4062038404726735,
      "grad_norm": 0.2600086033344269,
      "learning_rate": 0.00010631851213203596,
      "loss": 0.2514,
      "step": 3808
    },
    {
      "epoch": 1.4065731166912852,
      "grad_norm": 0.25537365674972534,
      "learning_rate": 0.00010629387855647248,
      "loss": 0.222,
      "step": 3809
    },
    {
      "epoch": 1.4069423929098965,
      "grad_norm": 0.2847685217857361,
      "learning_rate": 0.00010626924498090898,
      "loss": 0.2719,
      "step": 3810
    },
    {
      "epoch": 1.4073116691285081,
      "grad_norm": 0.21500246226787567,
      "learning_rate": 0.0001062446114053455,
      "loss": 0.1824,
      "step": 3811
    },
    {
      "epoch": 1.4076809453471197,
      "grad_norm": 0.3191910982131958,
      "learning_rate": 0.000106219977829782,
      "loss": 0.2317,
      "step": 3812
    },
    {
      "epoch": 1.4080502215657311,
      "grad_norm": 0.22762149572372437,
      "learning_rate": 0.00010619534425421851,
      "loss": 0.217,
      "step": 3813
    },
    {
      "epoch": 1.4084194977843427,
      "grad_norm": 0.23422375321388245,
      "learning_rate": 0.00010617071067865501,
      "loss": 0.237,
      "step": 3814
    },
    {
      "epoch": 1.408788774002954,
      "grad_norm": 0.2757764458656311,
      "learning_rate": 0.00010614607710309153,
      "loss": 0.2148,
      "step": 3815
    },
    {
      "epoch": 1.4091580502215657,
      "grad_norm": 0.5969408750534058,
      "learning_rate": 0.00010612144352752801,
      "loss": 0.285,
      "step": 3816
    },
    {
      "epoch": 1.4095273264401773,
      "grad_norm": 0.3429873585700989,
      "learning_rate": 0.00010609680995196454,
      "loss": 0.2885,
      "step": 3817
    },
    {
      "epoch": 1.409896602658789,
      "grad_norm": 0.2952936887741089,
      "learning_rate": 0.00010607217637640103,
      "loss": 0.2207,
      "step": 3818
    },
    {
      "epoch": 1.4102658788774003,
      "grad_norm": 0.273403137922287,
      "learning_rate": 0.00010604754280083756,
      "loss": 0.2558,
      "step": 3819
    },
    {
      "epoch": 1.410635155096012,
      "grad_norm": 0.22843249142169952,
      "learning_rate": 0.00010602290922527405,
      "loss": 0.1915,
      "step": 3820
    },
    {
      "epoch": 1.4110044313146233,
      "grad_norm": 0.24980376660823822,
      "learning_rate": 0.00010599827564971056,
      "loss": 0.2318,
      "step": 3821
    },
    {
      "epoch": 1.4113737075332349,
      "grad_norm": 0.2425321340560913,
      "learning_rate": 0.00010597364207414706,
      "loss": 0.2232,
      "step": 3822
    },
    {
      "epoch": 1.4117429837518465,
      "grad_norm": 0.2785226106643677,
      "learning_rate": 0.00010594900849858358,
      "loss": 0.2312,
      "step": 3823
    },
    {
      "epoch": 1.4121122599704579,
      "grad_norm": 0.22242644429206848,
      "learning_rate": 0.00010592437492302008,
      "loss": 0.1902,
      "step": 3824
    },
    {
      "epoch": 1.4124815361890695,
      "grad_norm": 0.26450857520103455,
      "learning_rate": 0.00010589974134745659,
      "loss": 0.2219,
      "step": 3825
    },
    {
      "epoch": 1.4128508124076808,
      "grad_norm": 0.26558247208595276,
      "learning_rate": 0.00010587510777189309,
      "loss": 0.2296,
      "step": 3826
    },
    {
      "epoch": 1.4132200886262924,
      "grad_norm": 0.2665616571903229,
      "learning_rate": 0.00010585047419632961,
      "loss": 0.2357,
      "step": 3827
    },
    {
      "epoch": 1.413589364844904,
      "grad_norm": 0.24615925550460815,
      "learning_rate": 0.00010582584062076611,
      "loss": 0.233,
      "step": 3828
    },
    {
      "epoch": 1.4139586410635154,
      "grad_norm": 0.25677865743637085,
      "learning_rate": 0.00010580120704520262,
      "loss": 0.2011,
      "step": 3829
    },
    {
      "epoch": 1.414327917282127,
      "grad_norm": 0.25913333892822266,
      "learning_rate": 0.00010577657346963912,
      "loss": 0.2396,
      "step": 3830
    },
    {
      "epoch": 1.4146971935007384,
      "grad_norm": 0.3277101218700409,
      "learning_rate": 0.00010575193989407564,
      "loss": 0.2446,
      "step": 3831
    },
    {
      "epoch": 1.41506646971935,
      "grad_norm": 0.23574373126029968,
      "learning_rate": 0.00010572730631851213,
      "loss": 0.2325,
      "step": 3832
    },
    {
      "epoch": 1.4154357459379616,
      "grad_norm": 0.25479990243911743,
      "learning_rate": 0.00010570267274294865,
      "loss": 0.2242,
      "step": 3833
    },
    {
      "epoch": 1.4158050221565732,
      "grad_norm": 0.2911861538887024,
      "learning_rate": 0.00010567803916738514,
      "loss": 0.2126,
      "step": 3834
    },
    {
      "epoch": 1.4161742983751846,
      "grad_norm": 0.24522805213928223,
      "learning_rate": 0.00010565340559182167,
      "loss": 0.2366,
      "step": 3835
    },
    {
      "epoch": 1.4165435745937962,
      "grad_norm": 0.2456912100315094,
      "learning_rate": 0.00010562877201625816,
      "loss": 0.2008,
      "step": 3836
    },
    {
      "epoch": 1.4169128508124076,
      "grad_norm": 0.24651378393173218,
      "learning_rate": 0.00010560413844069467,
      "loss": 0.2032,
      "step": 3837
    },
    {
      "epoch": 1.4172821270310192,
      "grad_norm": 0.28535136580467224,
      "learning_rate": 0.00010557950486513117,
      "loss": 0.2572,
      "step": 3838
    },
    {
      "epoch": 1.4176514032496308,
      "grad_norm": 0.3038933277130127,
      "learning_rate": 0.00010555487128956769,
      "loss": 0.2531,
      "step": 3839
    },
    {
      "epoch": 1.4180206794682422,
      "grad_norm": 0.27011793851852417,
      "learning_rate": 0.00010553023771400419,
      "loss": 0.2549,
      "step": 3840
    },
    {
      "epoch": 1.4183899556868538,
      "grad_norm": 0.21485698223114014,
      "learning_rate": 0.0001055056041384407,
      "loss": 0.1899,
      "step": 3841
    },
    {
      "epoch": 1.4187592319054652,
      "grad_norm": 0.3180251717567444,
      "learning_rate": 0.0001054809705628772,
      "loss": 0.2393,
      "step": 3842
    },
    {
      "epoch": 1.4191285081240768,
      "grad_norm": 0.2945382297039032,
      "learning_rate": 0.00010545633698731372,
      "loss": 0.2455,
      "step": 3843
    },
    {
      "epoch": 1.4194977843426884,
      "grad_norm": 0.29279324412345886,
      "learning_rate": 0.00010543170341175022,
      "loss": 0.251,
      "step": 3844
    },
    {
      "epoch": 1.4198670605613,
      "grad_norm": 0.30234163999557495,
      "learning_rate": 0.00010540706983618674,
      "loss": 0.2545,
      "step": 3845
    },
    {
      "epoch": 1.4202363367799113,
      "grad_norm": 0.23178504407405853,
      "learning_rate": 0.00010538243626062322,
      "loss": 0.2203,
      "step": 3846
    },
    {
      "epoch": 1.420605612998523,
      "grad_norm": 0.24344073235988617,
      "learning_rate": 0.00010535780268505975,
      "loss": 0.2348,
      "step": 3847
    },
    {
      "epoch": 1.4209748892171343,
      "grad_norm": 0.2559746503829956,
      "learning_rate": 0.00010533316910949624,
      "loss": 0.2093,
      "step": 3848
    },
    {
      "epoch": 1.421344165435746,
      "grad_norm": 0.3378274738788605,
      "learning_rate": 0.00010530853553393277,
      "loss": 0.2978,
      "step": 3849
    },
    {
      "epoch": 1.4217134416543575,
      "grad_norm": 0.26383623480796814,
      "learning_rate": 0.00010528390195836925,
      "loss": 0.2372,
      "step": 3850
    },
    {
      "epoch": 1.4217134416543575,
      "eval_loss": 0.2668742835521698,
      "eval_runtime": 5.8577,
      "eval_samples_per_second": 8.536,
      "eval_steps_per_second": 1.195,
      "step": 3850
    },
    {
      "epoch": 1.422082717872969,
      "grad_norm": 0.27024218440055847,
      "learning_rate": 0.00010525926838280577,
      "loss": 0.2198,
      "step": 3851
    },
    {
      "epoch": 1.4224519940915805,
      "grad_norm": 0.2606055736541748,
      "learning_rate": 0.00010523463480724227,
      "loss": 0.2284,
      "step": 3852
    },
    {
      "epoch": 1.422821270310192,
      "grad_norm": 0.2458440065383911,
      "learning_rate": 0.00010521000123167878,
      "loss": 0.1999,
      "step": 3853
    },
    {
      "epoch": 1.4231905465288035,
      "grad_norm": 0.36551737785339355,
      "learning_rate": 0.00010518536765611529,
      "loss": 0.2538,
      "step": 3854
    },
    {
      "epoch": 1.423559822747415,
      "grad_norm": 0.30359822511672974,
      "learning_rate": 0.0001051607340805518,
      "loss": 0.2623,
      "step": 3855
    },
    {
      "epoch": 1.4239290989660267,
      "grad_norm": 0.19720108807086945,
      "learning_rate": 0.0001051361005049883,
      "loss": 0.1671,
      "step": 3856
    },
    {
      "epoch": 1.424298375184638,
      "grad_norm": 0.31897875666618347,
      "learning_rate": 0.00010511146692942482,
      "loss": 0.2078,
      "step": 3857
    },
    {
      "epoch": 1.4246676514032497,
      "grad_norm": 0.2386259138584137,
      "learning_rate": 0.00010508683335386132,
      "loss": 0.2147,
      "step": 3858
    },
    {
      "epoch": 1.425036927621861,
      "grad_norm": 0.2897254228591919,
      "learning_rate": 0.00010506219977829783,
      "loss": 0.2349,
      "step": 3859
    },
    {
      "epoch": 1.4254062038404727,
      "grad_norm": 0.3066774308681488,
      "learning_rate": 0.00010503756620273433,
      "loss": 0.241,
      "step": 3860
    },
    {
      "epoch": 1.4257754800590843,
      "grad_norm": 0.30762627720832825,
      "learning_rate": 0.00010501293262717085,
      "loss": 0.2634,
      "step": 3861
    },
    {
      "epoch": 1.4261447562776957,
      "grad_norm": 0.2821105122566223,
      "learning_rate": 0.00010498829905160734,
      "loss": 0.2391,
      "step": 3862
    },
    {
      "epoch": 1.4265140324963073,
      "grad_norm": 0.365055650472641,
      "learning_rate": 0.00010496366547604386,
      "loss": 0.2756,
      "step": 3863
    },
    {
      "epoch": 1.4268833087149186,
      "grad_norm": 0.31532981991767883,
      "learning_rate": 0.00010493903190048035,
      "loss": 0.2579,
      "step": 3864
    },
    {
      "epoch": 1.4272525849335302,
      "grad_norm": 0.27469801902770996,
      "learning_rate": 0.00010491439832491688,
      "loss": 0.2225,
      "step": 3865
    },
    {
      "epoch": 1.4276218611521418,
      "grad_norm": 0.30740001797676086,
      "learning_rate": 0.00010488976474935337,
      "loss": 0.3066,
      "step": 3866
    },
    {
      "epoch": 1.4279911373707534,
      "grad_norm": 0.23712503910064697,
      "learning_rate": 0.00010486513117378988,
      "loss": 0.1868,
      "step": 3867
    },
    {
      "epoch": 1.4283604135893648,
      "grad_norm": 0.28384414315223694,
      "learning_rate": 0.00010484049759822638,
      "loss": 0.2263,
      "step": 3868
    },
    {
      "epoch": 1.4287296898079764,
      "grad_norm": 0.22514435648918152,
      "learning_rate": 0.0001048158640226629,
      "loss": 0.2147,
      "step": 3869
    },
    {
      "epoch": 1.4290989660265878,
      "grad_norm": 0.31152236461639404,
      "learning_rate": 0.0001047912304470994,
      "loss": 0.2477,
      "step": 3870
    },
    {
      "epoch": 1.4294682422451994,
      "grad_norm": 0.35805216431617737,
      "learning_rate": 0.00010476659687153591,
      "loss": 0.2623,
      "step": 3871
    },
    {
      "epoch": 1.429837518463811,
      "grad_norm": 0.2790660858154297,
      "learning_rate": 0.00010474196329597241,
      "loss": 0.2352,
      "step": 3872
    },
    {
      "epoch": 1.4302067946824224,
      "grad_norm": 0.2426290512084961,
      "learning_rate": 0.00010471732972040893,
      "loss": 0.2381,
      "step": 3873
    },
    {
      "epoch": 1.430576070901034,
      "grad_norm": 0.24818822741508484,
      "learning_rate": 0.00010469269614484543,
      "loss": 0.2023,
      "step": 3874
    },
    {
      "epoch": 1.4309453471196454,
      "grad_norm": 0.325644850730896,
      "learning_rate": 0.00010466806256928194,
      "loss": 0.2917,
      "step": 3875
    },
    {
      "epoch": 1.431314623338257,
      "grad_norm": 0.2357746958732605,
      "learning_rate": 0.00010464342899371845,
      "loss": 0.2195,
      "step": 3876
    },
    {
      "epoch": 1.4316838995568686,
      "grad_norm": 0.292472779750824,
      "learning_rate": 0.00010461879541815496,
      "loss": 0.209,
      "step": 3877
    },
    {
      "epoch": 1.4320531757754802,
      "grad_norm": 0.2933250367641449,
      "learning_rate": 0.00010459416184259145,
      "loss": 0.2442,
      "step": 3878
    },
    {
      "epoch": 1.4324224519940916,
      "grad_norm": 0.2687840461730957,
      "learning_rate": 0.00010456952826702798,
      "loss": 0.21,
      "step": 3879
    },
    {
      "epoch": 1.4327917282127032,
      "grad_norm": 0.33804115653038025,
      "learning_rate": 0.00010454489469146446,
      "loss": 0.2396,
      "step": 3880
    },
    {
      "epoch": 1.4331610044313146,
      "grad_norm": 0.25264784693717957,
      "learning_rate": 0.00010452026111590099,
      "loss": 0.239,
      "step": 3881
    },
    {
      "epoch": 1.4335302806499262,
      "grad_norm": 0.3141342103481293,
      "learning_rate": 0.00010449562754033748,
      "loss": 0.2376,
      "step": 3882
    },
    {
      "epoch": 1.4338995568685378,
      "grad_norm": 0.23936063051223755,
      "learning_rate": 0.000104470993964774,
      "loss": 0.2067,
      "step": 3883
    },
    {
      "epoch": 1.4342688330871491,
      "grad_norm": 0.32110515236854553,
      "learning_rate": 0.0001044463603892105,
      "loss": 0.2483,
      "step": 3884
    },
    {
      "epoch": 1.4346381093057607,
      "grad_norm": 0.4062122702598572,
      "learning_rate": 0.00010442172681364701,
      "loss": 0.2665,
      "step": 3885
    },
    {
      "epoch": 1.4350073855243721,
      "grad_norm": 0.2796575725078583,
      "learning_rate": 0.00010439709323808351,
      "loss": 0.243,
      "step": 3886
    },
    {
      "epoch": 1.4353766617429837,
      "grad_norm": 0.23514513671398163,
      "learning_rate": 0.00010437245966252003,
      "loss": 0.2293,
      "step": 3887
    },
    {
      "epoch": 1.4357459379615953,
      "grad_norm": 0.2974199056625366,
      "learning_rate": 0.00010434782608695653,
      "loss": 0.2577,
      "step": 3888
    },
    {
      "epoch": 1.4361152141802067,
      "grad_norm": 0.31965237855911255,
      "learning_rate": 0.00010432319251139304,
      "loss": 0.2676,
      "step": 3889
    },
    {
      "epoch": 1.4364844903988183,
      "grad_norm": 0.31490933895111084,
      "learning_rate": 0.00010429855893582954,
      "loss": 0.2708,
      "step": 3890
    },
    {
      "epoch": 1.43685376661743,
      "grad_norm": 0.3214377164840698,
      "learning_rate": 0.00010427392536026606,
      "loss": 0.2886,
      "step": 3891
    },
    {
      "epoch": 1.4372230428360413,
      "grad_norm": 0.2550215721130371,
      "learning_rate": 0.00010424929178470256,
      "loss": 0.2547,
      "step": 3892
    },
    {
      "epoch": 1.437592319054653,
      "grad_norm": 0.2504318356513977,
      "learning_rate": 0.00010422465820913907,
      "loss": 0.2254,
      "step": 3893
    },
    {
      "epoch": 1.4379615952732645,
      "grad_norm": 0.2645464241504669,
      "learning_rate": 0.00010420002463357556,
      "loss": 0.286,
      "step": 3894
    },
    {
      "epoch": 1.4383308714918759,
      "grad_norm": 0.25396862626075745,
      "learning_rate": 0.00010417539105801209,
      "loss": 0.2016,
      "step": 3895
    },
    {
      "epoch": 1.4387001477104875,
      "grad_norm": 0.25158530473709106,
      "learning_rate": 0.00010415075748244858,
      "loss": 0.2286,
      "step": 3896
    },
    {
      "epoch": 1.4390694239290989,
      "grad_norm": 0.25421851873397827,
      "learning_rate": 0.0001041261239068851,
      "loss": 0.2338,
      "step": 3897
    },
    {
      "epoch": 1.4394387001477105,
      "grad_norm": 0.22105631232261658,
      "learning_rate": 0.00010410149033132159,
      "loss": 0.2012,
      "step": 3898
    },
    {
      "epoch": 1.439807976366322,
      "grad_norm": 0.3331243395805359,
      "learning_rate": 0.0001040768567557581,
      "loss": 0.2238,
      "step": 3899
    },
    {
      "epoch": 1.4401772525849335,
      "grad_norm": 0.3024211525917053,
      "learning_rate": 0.00010405222318019461,
      "loss": 0.2446,
      "step": 3900
    },
    {
      "epoch": 1.4401772525849335,
      "eval_loss": 0.2657105326652527,
      "eval_runtime": 5.8545,
      "eval_samples_per_second": 8.54,
      "eval_steps_per_second": 1.196,
      "step": 3900
    },
    {
      "epoch": 1.440546528803545,
      "grad_norm": 0.25392836332321167,
      "learning_rate": 0.00010402758960463112,
      "loss": 0.2286,
      "step": 3901
    },
    {
      "epoch": 1.4409158050221564,
      "grad_norm": 0.27044492959976196,
      "learning_rate": 0.00010400295602906762,
      "loss": 0.2382,
      "step": 3902
    },
    {
      "epoch": 1.441285081240768,
      "grad_norm": 0.2769128382205963,
      "learning_rate": 0.00010397832245350414,
      "loss": 0.2287,
      "step": 3903
    },
    {
      "epoch": 1.4416543574593796,
      "grad_norm": 0.2098458856344223,
      "learning_rate": 0.00010395368887794064,
      "loss": 0.1921,
      "step": 3904
    },
    {
      "epoch": 1.4420236336779912,
      "grad_norm": 0.2717629373073578,
      "learning_rate": 0.00010392905530237715,
      "loss": 0.2175,
      "step": 3905
    },
    {
      "epoch": 1.4423929098966026,
      "grad_norm": 0.26989179849624634,
      "learning_rate": 0.00010390442172681365,
      "loss": 0.2445,
      "step": 3906
    },
    {
      "epoch": 1.4427621861152142,
      "grad_norm": 0.26079505681991577,
      "learning_rate": 0.00010387978815125017,
      "loss": 0.2269,
      "step": 3907
    },
    {
      "epoch": 1.4431314623338256,
      "grad_norm": 0.32042261958122253,
      "learning_rate": 0.00010385515457568667,
      "loss": 0.2832,
      "step": 3908
    },
    {
      "epoch": 1.4435007385524372,
      "grad_norm": 0.24444133043289185,
      "learning_rate": 0.00010383052100012318,
      "loss": 0.1896,
      "step": 3909
    },
    {
      "epoch": 1.4438700147710488,
      "grad_norm": 0.27994006872177124,
      "learning_rate": 0.00010380588742455967,
      "loss": 0.2309,
      "step": 3910
    },
    {
      "epoch": 1.4442392909896602,
      "grad_norm": 0.2504062056541443,
      "learning_rate": 0.0001037812538489962,
      "loss": 0.2309,
      "step": 3911
    },
    {
      "epoch": 1.4446085672082718,
      "grad_norm": 0.2949374318122864,
      "learning_rate": 0.00010375662027343269,
      "loss": 0.2603,
      "step": 3912
    },
    {
      "epoch": 1.4449778434268832,
      "grad_norm": 0.3061377704143524,
      "learning_rate": 0.00010373198669786922,
      "loss": 0.2604,
      "step": 3913
    },
    {
      "epoch": 1.4453471196454948,
      "grad_norm": 0.25518345832824707,
      "learning_rate": 0.0001037073531223057,
      "loss": 0.1952,
      "step": 3914
    },
    {
      "epoch": 1.4457163958641064,
      "grad_norm": 0.2688634395599365,
      "learning_rate": 0.00010368271954674222,
      "loss": 0.2271,
      "step": 3915
    },
    {
      "epoch": 1.446085672082718,
      "grad_norm": 0.3324325680732727,
      "learning_rate": 0.00010365808597117872,
      "loss": 0.26,
      "step": 3916
    },
    {
      "epoch": 1.4464549483013294,
      "grad_norm": 0.3041841685771942,
      "learning_rate": 0.00010363345239561523,
      "loss": 0.2409,
      "step": 3917
    },
    {
      "epoch": 1.446824224519941,
      "grad_norm": 0.28644803166389465,
      "learning_rate": 0.00010360881882005173,
      "loss": 0.2554,
      "step": 3918
    },
    {
      "epoch": 1.4471935007385524,
      "grad_norm": 0.22980338335037231,
      "learning_rate": 0.00010358418524448825,
      "loss": 0.208,
      "step": 3919
    },
    {
      "epoch": 1.447562776957164,
      "grad_norm": 0.24705706536769867,
      "learning_rate": 0.00010355955166892475,
      "loss": 0.2072,
      "step": 3920
    },
    {
      "epoch": 1.4479320531757756,
      "grad_norm": 0.3009048402309418,
      "learning_rate": 0.00010353491809336127,
      "loss": 0.2503,
      "step": 3921
    },
    {
      "epoch": 1.448301329394387,
      "grad_norm": 0.28049546480178833,
      "learning_rate": 0.00010351028451779777,
      "loss": 0.2592,
      "step": 3922
    },
    {
      "epoch": 1.4486706056129985,
      "grad_norm": 0.26004844903945923,
      "learning_rate": 0.00010348565094223428,
      "loss": 0.2328,
      "step": 3923
    },
    {
      "epoch": 1.44903988183161,
      "grad_norm": 0.26922911405563354,
      "learning_rate": 0.00010346101736667078,
      "loss": 0.233,
      "step": 3924
    },
    {
      "epoch": 1.4494091580502215,
      "grad_norm": 0.27191996574401855,
      "learning_rate": 0.0001034363837911073,
      "loss": 0.2253,
      "step": 3925
    },
    {
      "epoch": 1.4497784342688331,
      "grad_norm": 0.2850154936313629,
      "learning_rate": 0.00010341175021554378,
      "loss": 0.2856,
      "step": 3926
    },
    {
      "epoch": 1.4501477104874447,
      "grad_norm": 0.4026567041873932,
      "learning_rate": 0.00010338711663998031,
      "loss": 0.2571,
      "step": 3927
    },
    {
      "epoch": 1.450516986706056,
      "grad_norm": 0.24033533036708832,
      "learning_rate": 0.0001033624830644168,
      "loss": 0.2057,
      "step": 3928
    },
    {
      "epoch": 1.4508862629246677,
      "grad_norm": 0.29191315174102783,
      "learning_rate": 0.00010333784948885333,
      "loss": 0.2538,
      "step": 3929
    },
    {
      "epoch": 1.451255539143279,
      "grad_norm": 0.31869855523109436,
      "learning_rate": 0.00010331321591328982,
      "loss": 0.2527,
      "step": 3930
    },
    {
      "epoch": 1.4516248153618907,
      "grad_norm": 0.2808881998062134,
      "learning_rate": 0.00010328858233772633,
      "loss": 0.2236,
      "step": 3931
    },
    {
      "epoch": 1.4519940915805023,
      "grad_norm": 0.2541932463645935,
      "learning_rate": 0.00010326394876216283,
      "loss": 0.2182,
      "step": 3932
    },
    {
      "epoch": 1.4523633677991137,
      "grad_norm": 0.2607090175151825,
      "learning_rate": 0.00010323931518659935,
      "loss": 0.2303,
      "step": 3933
    },
    {
      "epoch": 1.4527326440177253,
      "grad_norm": 0.2605319917201996,
      "learning_rate": 0.00010321468161103585,
      "loss": 0.2352,
      "step": 3934
    },
    {
      "epoch": 1.4531019202363367,
      "grad_norm": 0.27073273062705994,
      "learning_rate": 0.00010319004803547236,
      "loss": 0.221,
      "step": 3935
    },
    {
      "epoch": 1.4534711964549483,
      "grad_norm": 0.2717744708061218,
      "learning_rate": 0.00010316541445990886,
      "loss": 0.2141,
      "step": 3936
    },
    {
      "epoch": 1.4538404726735599,
      "grad_norm": 0.2980343699455261,
      "learning_rate": 0.00010314078088434538,
      "loss": 0.2532,
      "step": 3937
    },
    {
      "epoch": 1.4542097488921715,
      "grad_norm": 0.3076517581939697,
      "learning_rate": 0.00010311614730878188,
      "loss": 0.2429,
      "step": 3938
    },
    {
      "epoch": 1.4545790251107829,
      "grad_norm": 0.21933375298976898,
      "learning_rate": 0.0001030915137332184,
      "loss": 0.1826,
      "step": 3939
    },
    {
      "epoch": 1.4549483013293945,
      "grad_norm": 0.3170267939567566,
      "learning_rate": 0.0001030668801576549,
      "loss": 0.2596,
      "step": 3940
    },
    {
      "epoch": 1.4553175775480058,
      "grad_norm": 0.30827200412750244,
      "learning_rate": 0.00010304224658209141,
      "loss": 0.2941,
      "step": 3941
    },
    {
      "epoch": 1.4556868537666174,
      "grad_norm": 0.27948877215385437,
      "learning_rate": 0.0001030176130065279,
      "loss": 0.2578,
      "step": 3942
    },
    {
      "epoch": 1.456056129985229,
      "grad_norm": 0.3193192780017853,
      "learning_rate": 0.00010299297943096442,
      "loss": 0.3213,
      "step": 3943
    },
    {
      "epoch": 1.4564254062038404,
      "grad_norm": 0.2982330620288849,
      "learning_rate": 0.00010296834585540091,
      "loss": 0.213,
      "step": 3944
    },
    {
      "epoch": 1.456794682422452,
      "grad_norm": 0.25382521748542786,
      "learning_rate": 0.00010294371227983744,
      "loss": 0.191,
      "step": 3945
    },
    {
      "epoch": 1.4571639586410634,
      "grad_norm": 0.28603941202163696,
      "learning_rate": 0.00010291907870427393,
      "loss": 0.2206,
      "step": 3946
    },
    {
      "epoch": 1.457533234859675,
      "grad_norm": 0.2785376310348511,
      "learning_rate": 0.00010289444512871044,
      "loss": 0.2476,
      "step": 3947
    },
    {
      "epoch": 1.4579025110782866,
      "grad_norm": 0.3084624111652374,
      "learning_rate": 0.00010286981155314694,
      "loss": 0.2835,
      "step": 3948
    },
    {
      "epoch": 1.4582717872968982,
      "grad_norm": 0.2752220630645752,
      "learning_rate": 0.00010284517797758344,
      "loss": 0.2326,
      "step": 3949
    },
    {
      "epoch": 1.4586410635155096,
      "grad_norm": 0.22812512516975403,
      "learning_rate": 0.00010282054440201996,
      "loss": 0.2256,
      "step": 3950
    },
    {
      "epoch": 1.4586410635155096,
      "eval_loss": 0.26708507537841797,
      "eval_runtime": 5.8575,
      "eval_samples_per_second": 8.536,
      "eval_steps_per_second": 1.195,
      "step": 3950
    },
    {
      "epoch": 1.4590103397341212,
      "grad_norm": 0.2775406241416931,
      "learning_rate": 0.00010279591082645646,
      "loss": 0.2589,
      "step": 3951
    },
    {
      "epoch": 1.4593796159527326,
      "grad_norm": 0.23759698867797852,
      "learning_rate": 0.00010277127725089298,
      "loss": 0.1956,
      "step": 3952
    },
    {
      "epoch": 1.4597488921713442,
      "grad_norm": 0.243507981300354,
      "learning_rate": 0.00010274664367532946,
      "loss": 0.2154,
      "step": 3953
    },
    {
      "epoch": 1.4601181683899558,
      "grad_norm": 0.24589616060256958,
      "learning_rate": 0.00010272201009976599,
      "loss": 0.2494,
      "step": 3954
    },
    {
      "epoch": 1.4604874446085672,
      "grad_norm": 0.25779590010643005,
      "learning_rate": 0.00010269737652420248,
      "loss": 0.2359,
      "step": 3955
    },
    {
      "epoch": 1.4608567208271788,
      "grad_norm": 0.23699389398097992,
      "learning_rate": 0.000102672742948639,
      "loss": 0.2466,
      "step": 3956
    },
    {
      "epoch": 1.4612259970457901,
      "grad_norm": 0.22925809025764465,
      "learning_rate": 0.0001026481093730755,
      "loss": 0.2097,
      "step": 3957
    },
    {
      "epoch": 1.4615952732644018,
      "grad_norm": 0.2855239808559418,
      "learning_rate": 0.00010262347579751201,
      "loss": 0.2594,
      "step": 3958
    },
    {
      "epoch": 1.4619645494830134,
      "grad_norm": 0.2658534049987793,
      "learning_rate": 0.00010259884222194851,
      "loss": 0.2354,
      "step": 3959
    },
    {
      "epoch": 1.4623338257016247,
      "grad_norm": 0.31895971298217773,
      "learning_rate": 0.00010257420864638502,
      "loss": 0.2417,
      "step": 3960
    },
    {
      "epoch": 1.4627031019202363,
      "grad_norm": 0.24943141639232635,
      "learning_rate": 0.00010254957507082153,
      "loss": 0.2065,
      "step": 3961
    },
    {
      "epoch": 1.463072378138848,
      "grad_norm": 0.24429693818092346,
      "learning_rate": 0.00010252494149525804,
      "loss": 0.2326,
      "step": 3962
    },
    {
      "epoch": 1.4634416543574593,
      "grad_norm": 0.386321485042572,
      "learning_rate": 0.00010250030791969454,
      "loss": 0.2663,
      "step": 3963
    },
    {
      "epoch": 1.463810930576071,
      "grad_norm": 0.29140713810920715,
      "learning_rate": 0.00010247567434413106,
      "loss": 0.2227,
      "step": 3964
    },
    {
      "epoch": 1.4641802067946825,
      "grad_norm": 0.2905665934085846,
      "learning_rate": 0.00010245104076856756,
      "loss": 0.2463,
      "step": 3965
    },
    {
      "epoch": 1.464549483013294,
      "grad_norm": 0.3086444139480591,
      "learning_rate": 0.00010242640719300407,
      "loss": 0.2811,
      "step": 3966
    },
    {
      "epoch": 1.4649187592319055,
      "grad_norm": 0.3844276964664459,
      "learning_rate": 0.00010240177361744057,
      "loss": 0.2513,
      "step": 3967
    },
    {
      "epoch": 1.465288035450517,
      "grad_norm": 0.2641187310218811,
      "learning_rate": 0.00010237714004187709,
      "loss": 0.2173,
      "step": 3968
    },
    {
      "epoch": 1.4656573116691285,
      "grad_norm": 0.3104974329471588,
      "learning_rate": 0.00010235250646631357,
      "loss": 0.2709,
      "step": 3969
    },
    {
      "epoch": 1.46602658788774,
      "grad_norm": 0.35863199830055237,
      "learning_rate": 0.0001023278728907501,
      "loss": 0.2933,
      "step": 3970
    },
    {
      "epoch": 1.4663958641063515,
      "grad_norm": 0.26285624504089355,
      "learning_rate": 0.00010230323931518659,
      "loss": 0.2349,
      "step": 3971
    },
    {
      "epoch": 1.466765140324963,
      "grad_norm": 0.283856600522995,
      "learning_rate": 0.00010227860573962312,
      "loss": 0.2417,
      "step": 3972
    },
    {
      "epoch": 1.4671344165435745,
      "grad_norm": 0.24891315400600433,
      "learning_rate": 0.0001022539721640596,
      "loss": 0.2046,
      "step": 3973
    },
    {
      "epoch": 1.467503692762186,
      "grad_norm": 0.30018529295921326,
      "learning_rate": 0.00010222933858849612,
      "loss": 0.2569,
      "step": 3974
    },
    {
      "epoch": 1.4678729689807977,
      "grad_norm": 0.30528995394706726,
      "learning_rate": 0.00010220470501293262,
      "loss": 0.2863,
      "step": 3975
    },
    {
      "epoch": 1.4682422451994093,
      "grad_norm": 0.20827481150627136,
      "learning_rate": 0.00010218007143736914,
      "loss": 0.214,
      "step": 3976
    },
    {
      "epoch": 1.4686115214180206,
      "grad_norm": 0.2594269812107086,
      "learning_rate": 0.00010215543786180564,
      "loss": 0.224,
      "step": 3977
    },
    {
      "epoch": 1.4689807976366323,
      "grad_norm": 0.3854300379753113,
      "learning_rate": 0.00010213080428624215,
      "loss": 0.2729,
      "step": 3978
    },
    {
      "epoch": 1.4693500738552436,
      "grad_norm": 0.2656078636646271,
      "learning_rate": 0.00010210617071067865,
      "loss": 0.2111,
      "step": 3979
    },
    {
      "epoch": 1.4697193500738552,
      "grad_norm": 0.24714063107967377,
      "learning_rate": 0.00010208153713511517,
      "loss": 0.2162,
      "step": 3980
    },
    {
      "epoch": 1.4700886262924668,
      "grad_norm": 0.32045742869377136,
      "learning_rate": 0.00010205690355955167,
      "loss": 0.2485,
      "step": 3981
    },
    {
      "epoch": 1.4704579025110782,
      "grad_norm": 0.21791408956050873,
      "learning_rate": 0.00010203226998398818,
      "loss": 0.2354,
      "step": 3982
    },
    {
      "epoch": 1.4708271787296898,
      "grad_norm": 0.2862100601196289,
      "learning_rate": 0.00010200763640842467,
      "loss": 0.2308,
      "step": 3983
    },
    {
      "epoch": 1.4711964549483012,
      "grad_norm": 0.30597779154777527,
      "learning_rate": 0.0001019830028328612,
      "loss": 0.2212,
      "step": 3984
    },
    {
      "epoch": 1.4715657311669128,
      "grad_norm": 0.2577318847179413,
      "learning_rate": 0.00010195836925729769,
      "loss": 0.2214,
      "step": 3985
    },
    {
      "epoch": 1.4719350073855244,
      "grad_norm": 0.21729454398155212,
      "learning_rate": 0.00010193373568173422,
      "loss": 0.1774,
      "step": 3986
    },
    {
      "epoch": 1.472304283604136,
      "grad_norm": 0.39620912075042725,
      "learning_rate": 0.0001019091021061707,
      "loss": 0.2499,
      "step": 3987
    },
    {
      "epoch": 1.4726735598227474,
      "grad_norm": 0.30386099219322205,
      "learning_rate": 0.00010188446853060723,
      "loss": 0.2267,
      "step": 3988
    },
    {
      "epoch": 1.473042836041359,
      "grad_norm": 0.30357635021209717,
      "learning_rate": 0.00010185983495504372,
      "loss": 0.2692,
      "step": 3989
    },
    {
      "epoch": 1.4734121122599704,
      "grad_norm": 0.3040999472141266,
      "learning_rate": 0.00010183520137948023,
      "loss": 0.2825,
      "step": 3990
    },
    {
      "epoch": 1.473781388478582,
      "grad_norm": 0.27398502826690674,
      "learning_rate": 0.00010181056780391673,
      "loss": 0.2344,
      "step": 3991
    },
    {
      "epoch": 1.4741506646971936,
      "grad_norm": 0.297529935836792,
      "learning_rate": 0.00010178593422835325,
      "loss": 0.2824,
      "step": 3992
    },
    {
      "epoch": 1.474519940915805,
      "grad_norm": 0.26736244559288025,
      "learning_rate": 0.00010176130065278975,
      "loss": 0.2225,
      "step": 3993
    },
    {
      "epoch": 1.4748892171344166,
      "grad_norm": 0.25624769926071167,
      "learning_rate": 0.00010173666707722626,
      "loss": 0.2416,
      "step": 3994
    },
    {
      "epoch": 1.475258493353028,
      "grad_norm": 0.34669265151023865,
      "learning_rate": 0.00010171203350166277,
      "loss": 0.2522,
      "step": 3995
    },
    {
      "epoch": 1.4756277695716395,
      "grad_norm": 0.30900081992149353,
      "learning_rate": 0.00010168739992609928,
      "loss": 0.2328,
      "step": 3996
    },
    {
      "epoch": 1.4759970457902511,
      "grad_norm": 0.28143182396888733,
      "learning_rate": 0.00010166276635053578,
      "loss": 0.2544,
      "step": 3997
    },
    {
      "epoch": 1.4763663220088628,
      "grad_norm": 0.2547646462917328,
      "learning_rate": 0.0001016381327749723,
      "loss": 0.2429,
      "step": 3998
    },
    {
      "epoch": 1.4767355982274741,
      "grad_norm": 0.25624385476112366,
      "learning_rate": 0.00010161349919940878,
      "loss": 0.192,
      "step": 3999
    },
    {
      "epoch": 1.4771048744460857,
      "grad_norm": 0.35263240337371826,
      "learning_rate": 0.00010158886562384531,
      "loss": 0.2793,
      "step": 4000
    },
    {
      "epoch": 1.4771048744460857,
      "eval_loss": 0.2678302824497223,
      "eval_runtime": 5.8712,
      "eval_samples_per_second": 8.516,
      "eval_steps_per_second": 1.192,
      "step": 4000
    },
    {
      "epoch": 1.4774741506646971,
      "grad_norm": 0.26784220337867737,
      "learning_rate": 0.0001015642320482818,
      "loss": 0.2109,
      "step": 4001
    },
    {
      "epoch": 1.4778434268833087,
      "grad_norm": 0.305281400680542,
      "learning_rate": 0.00010153959847271833,
      "loss": 0.2599,
      "step": 4002
    },
    {
      "epoch": 1.4782127031019203,
      "grad_norm": 0.2713955342769623,
      "learning_rate": 0.00010151496489715482,
      "loss": 0.2653,
      "step": 4003
    },
    {
      "epoch": 1.4785819793205317,
      "grad_norm": 0.2568889856338501,
      "learning_rate": 0.00010149033132159133,
      "loss": 0.2374,
      "step": 4004
    },
    {
      "epoch": 1.4789512555391433,
      "grad_norm": 0.27562272548675537,
      "learning_rate": 0.00010146569774602783,
      "loss": 0.2313,
      "step": 4005
    },
    {
      "epoch": 1.4793205317577547,
      "grad_norm": 0.2352875918149948,
      "learning_rate": 0.00010144106417046435,
      "loss": 0.1985,
      "step": 4006
    },
    {
      "epoch": 1.4796898079763663,
      "grad_norm": 0.31400978565216064,
      "learning_rate": 0.00010141643059490085,
      "loss": 0.2322,
      "step": 4007
    },
    {
      "epoch": 1.480059084194978,
      "grad_norm": 0.2479861080646515,
      "learning_rate": 0.00010139179701933736,
      "loss": 0.2156,
      "step": 4008
    },
    {
      "epoch": 1.4804283604135895,
      "grad_norm": 0.255172461271286,
      "learning_rate": 0.00010136716344377386,
      "loss": 0.2241,
      "step": 4009
    },
    {
      "epoch": 1.4807976366322009,
      "grad_norm": 0.2996932864189148,
      "learning_rate": 0.00010134252986821038,
      "loss": 0.2656,
      "step": 4010
    },
    {
      "epoch": 1.4811669128508125,
      "grad_norm": 0.24185395240783691,
      "learning_rate": 0.00010131789629264688,
      "loss": 0.1916,
      "step": 4011
    },
    {
      "epoch": 1.4815361890694239,
      "grad_norm": 0.29753515124320984,
      "learning_rate": 0.00010129326271708339,
      "loss": 0.2564,
      "step": 4012
    },
    {
      "epoch": 1.4819054652880355,
      "grad_norm": 0.2661593556404114,
      "learning_rate": 0.0001012686291415199,
      "loss": 0.2245,
      "step": 4013
    },
    {
      "epoch": 1.482274741506647,
      "grad_norm": 0.27129074931144714,
      "learning_rate": 0.00010124399556595641,
      "loss": 0.2234,
      "step": 4014
    },
    {
      "epoch": 1.4826440177252584,
      "grad_norm": 0.2453327476978302,
      "learning_rate": 0.0001012193619903929,
      "loss": 0.2384,
      "step": 4015
    },
    {
      "epoch": 1.48301329394387,
      "grad_norm": 0.28773802518844604,
      "learning_rate": 0.00010119472841482942,
      "loss": 0.2505,
      "step": 4016
    },
    {
      "epoch": 1.4833825701624814,
      "grad_norm": 0.2755826413631439,
      "learning_rate": 0.00010117009483926591,
      "loss": 0.2131,
      "step": 4017
    },
    {
      "epoch": 1.483751846381093,
      "grad_norm": 0.2814352512359619,
      "learning_rate": 0.00010114546126370244,
      "loss": 0.217,
      "step": 4018
    },
    {
      "epoch": 1.4841211225997046,
      "grad_norm": 0.28828540444374084,
      "learning_rate": 0.00010112082768813893,
      "loss": 0.263,
      "step": 4019
    },
    {
      "epoch": 1.4844903988183162,
      "grad_norm": 0.2544025182723999,
      "learning_rate": 0.00010109619411257544,
      "loss": 0.2509,
      "step": 4020
    },
    {
      "epoch": 1.4848596750369276,
      "grad_norm": 0.2896730899810791,
      "learning_rate": 0.00010107156053701194,
      "loss": 0.2519,
      "step": 4021
    },
    {
      "epoch": 1.4852289512555392,
      "grad_norm": 0.2682430148124695,
      "learning_rate": 0.00010104692696144846,
      "loss": 0.2538,
      "step": 4022
    },
    {
      "epoch": 1.4855982274741506,
      "grad_norm": 0.24106591939926147,
      "learning_rate": 0.00010102229338588496,
      "loss": 0.2411,
      "step": 4023
    },
    {
      "epoch": 1.4859675036927622,
      "grad_norm": 0.28132450580596924,
      "learning_rate": 0.00010099765981032147,
      "loss": 0.2722,
      "step": 4024
    },
    {
      "epoch": 1.4863367799113738,
      "grad_norm": 0.22729288041591644,
      "learning_rate": 0.00010097302623475797,
      "loss": 0.1995,
      "step": 4025
    },
    {
      "epoch": 1.4867060561299852,
      "grad_norm": 0.28534844517707825,
      "learning_rate": 0.00010094839265919449,
      "loss": 0.2272,
      "step": 4026
    },
    {
      "epoch": 1.4870753323485968,
      "grad_norm": 0.2466653436422348,
      "learning_rate": 0.00010092375908363099,
      "loss": 0.1938,
      "step": 4027
    },
    {
      "epoch": 1.4874446085672082,
      "grad_norm": 0.3041146695613861,
      "learning_rate": 0.0001008991255080675,
      "loss": 0.2308,
      "step": 4028
    },
    {
      "epoch": 1.4878138847858198,
      "grad_norm": 0.27459827065467834,
      "learning_rate": 0.000100874491932504,
      "loss": 0.2349,
      "step": 4029
    },
    {
      "epoch": 1.4881831610044314,
      "grad_norm": 0.2825285494327545,
      "learning_rate": 0.00010084985835694052,
      "loss": 0.2419,
      "step": 4030
    },
    {
      "epoch": 1.4885524372230428,
      "grad_norm": 0.27188214659690857,
      "learning_rate": 0.00010082522478137701,
      "loss": 0.2105,
      "step": 4031
    },
    {
      "epoch": 1.4889217134416544,
      "grad_norm": 0.2776201665401459,
      "learning_rate": 0.00010080059120581354,
      "loss": 0.2144,
      "step": 4032
    },
    {
      "epoch": 1.4892909896602657,
      "grad_norm": 0.5040718913078308,
      "learning_rate": 0.00010077595763025002,
      "loss": 0.2785,
      "step": 4033
    },
    {
      "epoch": 1.4896602658788773,
      "grad_norm": 0.3240455687046051,
      "learning_rate": 0.00010075132405468655,
      "loss": 0.29,
      "step": 4034
    },
    {
      "epoch": 1.490029542097489,
      "grad_norm": 0.27039119601249695,
      "learning_rate": 0.00010072669047912304,
      "loss": 0.247,
      "step": 4035
    },
    {
      "epoch": 1.4903988183161005,
      "grad_norm": 0.2560168206691742,
      "learning_rate": 0.00010070205690355955,
      "loss": 0.2038,
      "step": 4036
    },
    {
      "epoch": 1.490768094534712,
      "grad_norm": 0.2725893259048462,
      "learning_rate": 0.00010067742332799606,
      "loss": 0.2249,
      "step": 4037
    },
    {
      "epoch": 1.4911373707533235,
      "grad_norm": 0.24265554547309875,
      "learning_rate": 0.00010065278975243257,
      "loss": 0.2332,
      "step": 4038
    },
    {
      "epoch": 1.491506646971935,
      "grad_norm": 0.2514985203742981,
      "learning_rate": 0.00010062815617686907,
      "loss": 0.2242,
      "step": 4039
    },
    {
      "epoch": 1.4918759231905465,
      "grad_norm": 0.359241783618927,
      "learning_rate": 0.00010060352260130559,
      "loss": 0.2744,
      "step": 4040
    },
    {
      "epoch": 1.4922451994091581,
      "grad_norm": 0.25917497277259827,
      "learning_rate": 0.00010057888902574209,
      "loss": 0.1946,
      "step": 4041
    },
    {
      "epoch": 1.4926144756277695,
      "grad_norm": 0.28097108006477356,
      "learning_rate": 0.0001005542554501786,
      "loss": 0.2319,
      "step": 4042
    },
    {
      "epoch": 1.492983751846381,
      "grad_norm": 0.29191944003105164,
      "learning_rate": 0.0001005296218746151,
      "loss": 0.2646,
      "step": 4043
    },
    {
      "epoch": 1.4933530280649925,
      "grad_norm": 0.41488784551620483,
      "learning_rate": 0.00010050498829905162,
      "loss": 0.2781,
      "step": 4044
    },
    {
      "epoch": 1.493722304283604,
      "grad_norm": 0.27251023054122925,
      "learning_rate": 0.00010048035472348812,
      "loss": 0.223,
      "step": 4045
    },
    {
      "epoch": 1.4940915805022157,
      "grad_norm": 0.22081872820854187,
      "learning_rate": 0.00010045572114792463,
      "loss": 0.2026,
      "step": 4046
    },
    {
      "epoch": 1.4944608567208273,
      "grad_norm": 0.2805134057998657,
      "learning_rate": 0.00010043108757236112,
      "loss": 0.255,
      "step": 4047
    },
    {
      "epoch": 1.4948301329394387,
      "grad_norm": 0.2585667371749878,
      "learning_rate": 0.00010040645399679765,
      "loss": 0.227,
      "step": 4048
    },
    {
      "epoch": 1.4951994091580503,
      "grad_norm": 0.32681065797805786,
      "learning_rate": 0.00010038182042123414,
      "loss": 0.2357,
      "step": 4049
    },
    {
      "epoch": 1.4955686853766617,
      "grad_norm": 0.2742816209793091,
      "learning_rate": 0.00010035718684567066,
      "loss": 0.2469,
      "step": 4050
    },
    {
      "epoch": 1.4955686853766617,
      "eval_loss": 0.26486024260520935,
      "eval_runtime": 5.8567,
      "eval_samples_per_second": 8.537,
      "eval_steps_per_second": 1.195,
      "step": 4050
    },
    {
      "epoch": 1.4959379615952733,
      "grad_norm": 0.25103434920310974,
      "learning_rate": 0.00010033255327010715,
      "loss": 0.2167,
      "step": 4051
    },
    {
      "epoch": 1.4963072378138849,
      "grad_norm": 0.25343605875968933,
      "learning_rate": 0.00010030791969454367,
      "loss": 0.2016,
      "step": 4052
    },
    {
      "epoch": 1.4966765140324962,
      "grad_norm": 0.3064647614955902,
      "learning_rate": 0.00010028328611898017,
      "loss": 0.2382,
      "step": 4053
    },
    {
      "epoch": 1.4970457902511078,
      "grad_norm": 0.3272014558315277,
      "learning_rate": 0.00010025865254341668,
      "loss": 0.2299,
      "step": 4054
    },
    {
      "epoch": 1.4974150664697192,
      "grad_norm": 0.2515038847923279,
      "learning_rate": 0.00010023401896785318,
      "loss": 0.2166,
      "step": 4055
    },
    {
      "epoch": 1.4977843426883308,
      "grad_norm": 0.25827375054359436,
      "learning_rate": 0.0001002093853922897,
      "loss": 0.2273,
      "step": 4056
    },
    {
      "epoch": 1.4981536189069424,
      "grad_norm": 0.2369767129421234,
      "learning_rate": 0.0001001847518167262,
      "loss": 0.1964,
      "step": 4057
    },
    {
      "epoch": 1.498522895125554,
      "grad_norm": 0.25905394554138184,
      "learning_rate": 0.00010016011824116271,
      "loss": 0.2243,
      "step": 4058
    },
    {
      "epoch": 1.4988921713441654,
      "grad_norm": 0.3252527415752411,
      "learning_rate": 0.00010013548466559921,
      "loss": 0.2476,
      "step": 4059
    },
    {
      "epoch": 1.499261447562777,
      "grad_norm": 0.2727453410625458,
      "learning_rate": 0.00010011085109003573,
      "loss": 0.2642,
      "step": 4060
    },
    {
      "epoch": 1.4996307237813884,
      "grad_norm": 0.24760130047798157,
      "learning_rate": 0.00010008621751447223,
      "loss": 0.1964,
      "step": 4061
    },
    {
      "epoch": 1.5,
      "grad_norm": 0.29868680238723755,
      "learning_rate": 0.00010006158393890875,
      "loss": 0.2332,
      "step": 4062
    },
    {
      "epoch": 1.5003692762186116,
      "grad_norm": 0.308330774307251,
      "learning_rate": 0.00010003695036334523,
      "loss": 0.2338,
      "step": 4063
    },
    {
      "epoch": 1.5007385524372232,
      "grad_norm": 0.2614595890045166,
      "learning_rate": 0.00010001231678778176,
      "loss": 0.2229,
      "step": 4064
    },
    {
      "epoch": 1.5011078286558346,
      "grad_norm": 0.23017829656600952,
      "learning_rate": 9.998768321221825e-05,
      "loss": 0.1753,
      "step": 4065
    },
    {
      "epoch": 1.501477104874446,
      "grad_norm": 0.2502877414226532,
      "learning_rate": 9.996304963665476e-05,
      "loss": 0.2181,
      "step": 4066
    },
    {
      "epoch": 1.5018463810930576,
      "grad_norm": 0.29967954754829407,
      "learning_rate": 9.993841606109126e-05,
      "loss": 0.2221,
      "step": 4067
    },
    {
      "epoch": 1.5022156573116692,
      "grad_norm": 0.2727149426937103,
      "learning_rate": 9.991378248552778e-05,
      "loss": 0.2207,
      "step": 4068
    },
    {
      "epoch": 1.5025849335302808,
      "grad_norm": 0.2796933352947235,
      "learning_rate": 9.988914890996428e-05,
      "loss": 0.2224,
      "step": 4069
    },
    {
      "epoch": 1.5029542097488922,
      "grad_norm": 0.24057498574256897,
      "learning_rate": 9.986451533440078e-05,
      "loss": 0.1949,
      "step": 4070
    },
    {
      "epoch": 1.5033234859675035,
      "grad_norm": 0.28947803378105164,
      "learning_rate": 9.98398817588373e-05,
      "loss": 0.2285,
      "step": 4071
    },
    {
      "epoch": 1.5036927621861151,
      "grad_norm": 0.27775415778160095,
      "learning_rate": 9.98152481832738e-05,
      "loss": 0.2277,
      "step": 4072
    },
    {
      "epoch": 1.5040620384047267,
      "grad_norm": 0.22638939321041107,
      "learning_rate": 9.979061460771031e-05,
      "loss": 0.1869,
      "step": 4073
    },
    {
      "epoch": 1.5044313146233383,
      "grad_norm": 0.28084293007850647,
      "learning_rate": 9.976598103214681e-05,
      "loss": 0.2281,
      "step": 4074
    },
    {
      "epoch": 1.5048005908419497,
      "grad_norm": 0.337291419506073,
      "learning_rate": 9.974134745658333e-05,
      "loss": 0.2373,
      "step": 4075
    },
    {
      "epoch": 1.5051698670605613,
      "grad_norm": 0.26597699522972107,
      "learning_rate": 9.971671388101983e-05,
      "loss": 0.2415,
      "step": 4076
    },
    {
      "epoch": 1.5055391432791727,
      "grad_norm": 0.3119012713432312,
      "learning_rate": 9.969208030545634e-05,
      "loss": 0.2505,
      "step": 4077
    },
    {
      "epoch": 1.5059084194977843,
      "grad_norm": 0.2967747449874878,
      "learning_rate": 9.966744672989284e-05,
      "loss": 0.2172,
      "step": 4078
    },
    {
      "epoch": 1.506277695716396,
      "grad_norm": 0.2876927852630615,
      "learning_rate": 9.964281315432935e-05,
      "loss": 0.225,
      "step": 4079
    },
    {
      "epoch": 1.5066469719350075,
      "grad_norm": 0.24519896507263184,
      "learning_rate": 9.961817957876586e-05,
      "loss": 0.2047,
      "step": 4080
    },
    {
      "epoch": 1.507016248153619,
      "grad_norm": 0.25056275725364685,
      "learning_rate": 9.959354600320236e-05,
      "loss": 0.2122,
      "step": 4081
    },
    {
      "epoch": 1.5073855243722303,
      "grad_norm": 0.26125919818878174,
      "learning_rate": 9.956891242763888e-05,
      "loss": 0.2156,
      "step": 4082
    },
    {
      "epoch": 1.5077548005908419,
      "grad_norm": 0.2800387740135193,
      "learning_rate": 9.954427885207538e-05,
      "loss": 0.221,
      "step": 4083
    },
    {
      "epoch": 1.5081240768094535,
      "grad_norm": 0.26222723722457886,
      "learning_rate": 9.951964527651189e-05,
      "loss": 0.2235,
      "step": 4084
    },
    {
      "epoch": 1.508493353028065,
      "grad_norm": 0.3005081117153168,
      "learning_rate": 9.949501170094839e-05,
      "loss": 0.2423,
      "step": 4085
    },
    {
      "epoch": 1.5088626292466765,
      "grad_norm": 0.30846738815307617,
      "learning_rate": 9.94703781253849e-05,
      "loss": 0.2833,
      "step": 4086
    },
    {
      "epoch": 1.509231905465288,
      "grad_norm": 0.24236111342906952,
      "learning_rate": 9.944574454982141e-05,
      "loss": 0.2138,
      "step": 4087
    },
    {
      "epoch": 1.5096011816838995,
      "grad_norm": 0.2984859347343445,
      "learning_rate": 9.942111097425791e-05,
      "loss": 0.2585,
      "step": 4088
    },
    {
      "epoch": 1.509970457902511,
      "grad_norm": 0.32192322611808777,
      "learning_rate": 9.939647739869442e-05,
      "loss": 0.2461,
      "step": 4089
    },
    {
      "epoch": 1.5103397341211227,
      "grad_norm": 0.3119538426399231,
      "learning_rate": 9.937184382313092e-05,
      "loss": 0.2687,
      "step": 4090
    },
    {
      "epoch": 1.5107090103397343,
      "grad_norm": 0.2933902144432068,
      "learning_rate": 9.934721024756744e-05,
      "loss": 0.2594,
      "step": 4091
    },
    {
      "epoch": 1.5110782865583456,
      "grad_norm": 0.2725570499897003,
      "learning_rate": 9.932257667200394e-05,
      "loss": 0.2278,
      "step": 4092
    },
    {
      "epoch": 1.511447562776957,
      "grad_norm": 0.23285800218582153,
      "learning_rate": 9.929794309644046e-05,
      "loss": 0.1939,
      "step": 4093
    },
    {
      "epoch": 1.5118168389955686,
      "grad_norm": 0.2728244960308075,
      "learning_rate": 9.927330952087696e-05,
      "loss": 0.2354,
      "step": 4094
    },
    {
      "epoch": 1.5121861152141802,
      "grad_norm": 0.2810523509979248,
      "learning_rate": 9.924867594531346e-05,
      "loss": 0.2171,
      "step": 4095
    },
    {
      "epoch": 1.5125553914327918,
      "grad_norm": 0.31245458126068115,
      "learning_rate": 9.922404236974997e-05,
      "loss": 0.2041,
      "step": 4096
    },
    {
      "epoch": 1.5129246676514032,
      "grad_norm": 0.27133268117904663,
      "learning_rate": 9.919940879418647e-05,
      "loss": 0.2438,
      "step": 4097
    },
    {
      "epoch": 1.5132939438700148,
      "grad_norm": 0.29600241780281067,
      "learning_rate": 9.917477521862299e-05,
      "loss": 0.2176,
      "step": 4098
    },
    {
      "epoch": 1.5136632200886262,
      "grad_norm": 0.26659178733825684,
      "learning_rate": 9.915014164305949e-05,
      "loss": 0.2302,
      "step": 4099
    },
    {
      "epoch": 1.5140324963072378,
      "grad_norm": 0.3164130449295044,
      "learning_rate": 9.9125508067496e-05,
      "loss": 0.2331,
      "step": 4100
    },
    {
      "epoch": 1.5140324963072378,
      "eval_loss": 0.271483451128006,
      "eval_runtime": 5.8555,
      "eval_samples_per_second": 8.539,
      "eval_steps_per_second": 1.195,
      "step": 4100
    },
    {
      "epoch": 1.5144017725258494,
      "grad_norm": 0.34098005294799805,
      "learning_rate": 9.91008744919325e-05,
      "loss": 0.2351,
      "step": 4101
    },
    {
      "epoch": 1.514771048744461,
      "grad_norm": 0.2760215401649475,
      "learning_rate": 9.9076240916369e-05,
      "loss": 0.2209,
      "step": 4102
    },
    {
      "epoch": 1.5151403249630724,
      "grad_norm": 0.2948487102985382,
      "learning_rate": 9.905160734080552e-05,
      "loss": 0.2412,
      "step": 4103
    },
    {
      "epoch": 1.5155096011816838,
      "grad_norm": 0.2646871507167816,
      "learning_rate": 9.902697376524202e-05,
      "loss": 0.2237,
      "step": 4104
    },
    {
      "epoch": 1.5158788774002954,
      "grad_norm": 0.28485599160194397,
      "learning_rate": 9.900234018967854e-05,
      "loss": 0.2455,
      "step": 4105
    },
    {
      "epoch": 1.516248153618907,
      "grad_norm": 0.33545544743537903,
      "learning_rate": 9.897770661411504e-05,
      "loss": 0.2699,
      "step": 4106
    },
    {
      "epoch": 1.5166174298375186,
      "grad_norm": 0.2689124643802643,
      "learning_rate": 9.895307303855155e-05,
      "loss": 0.2091,
      "step": 4107
    },
    {
      "epoch": 1.51698670605613,
      "grad_norm": 0.2539463937282562,
      "learning_rate": 9.892843946298805e-05,
      "loss": 0.2525,
      "step": 4108
    },
    {
      "epoch": 1.5173559822747416,
      "grad_norm": 0.23484019935131073,
      "learning_rate": 9.890380588742457e-05,
      "loss": 0.2118,
      "step": 4109
    },
    {
      "epoch": 1.517725258493353,
      "grad_norm": 0.3526381552219391,
      "learning_rate": 9.887917231186107e-05,
      "loss": 0.2821,
      "step": 4110
    },
    {
      "epoch": 1.5180945347119645,
      "grad_norm": 0.2914596498012543,
      "learning_rate": 9.885453873629757e-05,
      "loss": 0.2647,
      "step": 4111
    },
    {
      "epoch": 1.5184638109305761,
      "grad_norm": 0.3403332531452179,
      "learning_rate": 9.882990516073408e-05,
      "loss": 0.2353,
      "step": 4112
    },
    {
      "epoch": 1.5188330871491877,
      "grad_norm": 0.26303350925445557,
      "learning_rate": 9.880527158517059e-05,
      "loss": 0.2439,
      "step": 4113
    },
    {
      "epoch": 1.5192023633677991,
      "grad_norm": 0.330528199672699,
      "learning_rate": 9.87806380096071e-05,
      "loss": 0.2661,
      "step": 4114
    },
    {
      "epoch": 1.5195716395864105,
      "grad_norm": 0.2686528265476227,
      "learning_rate": 9.87560044340436e-05,
      "loss": 0.2218,
      "step": 4115
    },
    {
      "epoch": 1.519940915805022,
      "grad_norm": 0.24084657430648804,
      "learning_rate": 9.873137085848012e-05,
      "loss": 0.2061,
      "step": 4116
    },
    {
      "epoch": 1.5203101920236337,
      "grad_norm": 0.23300257325172424,
      "learning_rate": 9.870673728291662e-05,
      "loss": 0.1991,
      "step": 4117
    },
    {
      "epoch": 1.5206794682422453,
      "grad_norm": 0.23994757235050201,
      "learning_rate": 9.868210370735312e-05,
      "loss": 0.2217,
      "step": 4118
    },
    {
      "epoch": 1.5210487444608567,
      "grad_norm": 0.2562894821166992,
      "learning_rate": 9.865747013178963e-05,
      "loss": 0.2143,
      "step": 4119
    },
    {
      "epoch": 1.5214180206794683,
      "grad_norm": 0.26783227920532227,
      "learning_rate": 9.863283655622613e-05,
      "loss": 0.2375,
      "step": 4120
    },
    {
      "epoch": 1.5217872968980797,
      "grad_norm": 0.2599738538265228,
      "learning_rate": 9.860820298066265e-05,
      "loss": 0.2536,
      "step": 4121
    },
    {
      "epoch": 1.5221565731166913,
      "grad_norm": 0.24114684760570526,
      "learning_rate": 9.858356940509915e-05,
      "loss": 0.2085,
      "step": 4122
    },
    {
      "epoch": 1.5225258493353029,
      "grad_norm": 0.300831139087677,
      "learning_rate": 9.855893582953566e-05,
      "loss": 0.2434,
      "step": 4123
    },
    {
      "epoch": 1.5228951255539145,
      "grad_norm": 0.24409927427768707,
      "learning_rate": 9.853430225397217e-05,
      "loss": 0.2232,
      "step": 4124
    },
    {
      "epoch": 1.5232644017725259,
      "grad_norm": 0.2840551733970642,
      "learning_rate": 9.850966867840868e-05,
      "loss": 0.2477,
      "step": 4125
    },
    {
      "epoch": 1.5236336779911372,
      "grad_norm": 0.24531321227550507,
      "learning_rate": 9.848503510284518e-05,
      "loss": 0.1941,
      "step": 4126
    },
    {
      "epoch": 1.5240029542097489,
      "grad_norm": 0.3306686580181122,
      "learning_rate": 9.846040152728168e-05,
      "loss": 0.2586,
      "step": 4127
    },
    {
      "epoch": 1.5243722304283605,
      "grad_norm": 0.3042660355567932,
      "learning_rate": 9.84357679517182e-05,
      "loss": 0.2395,
      "step": 4128
    },
    {
      "epoch": 1.524741506646972,
      "grad_norm": 0.28639450669288635,
      "learning_rate": 9.84111343761547e-05,
      "loss": 0.2309,
      "step": 4129
    },
    {
      "epoch": 1.5251107828655834,
      "grad_norm": 0.27038899064064026,
      "learning_rate": 9.838650080059121e-05,
      "loss": 0.2459,
      "step": 4130
    },
    {
      "epoch": 1.5254800590841948,
      "grad_norm": 0.3015116751194,
      "learning_rate": 9.836186722502771e-05,
      "loss": 0.248,
      "step": 4131
    },
    {
      "epoch": 1.5258493353028064,
      "grad_norm": 0.27150559425354004,
      "learning_rate": 9.833723364946423e-05,
      "loss": 0.273,
      "step": 4132
    },
    {
      "epoch": 1.526218611521418,
      "grad_norm": 0.30560436844825745,
      "learning_rate": 9.831260007390073e-05,
      "loss": 0.2358,
      "step": 4133
    },
    {
      "epoch": 1.5265878877400296,
      "grad_norm": 0.23112213611602783,
      "learning_rate": 9.828796649833723e-05,
      "loss": 0.2131,
      "step": 4134
    },
    {
      "epoch": 1.5269571639586412,
      "grad_norm": 0.24395301938056946,
      "learning_rate": 9.826333292277374e-05,
      "loss": 0.2216,
      "step": 4135
    },
    {
      "epoch": 1.5273264401772526,
      "grad_norm": 0.2670608162879944,
      "learning_rate": 9.823869934721025e-05,
      "loss": 0.1996,
      "step": 4136
    },
    {
      "epoch": 1.527695716395864,
      "grad_norm": 0.2092277556657791,
      "learning_rate": 9.821406577164676e-05,
      "loss": 0.2079,
      "step": 4137
    },
    {
      "epoch": 1.5280649926144756,
      "grad_norm": 0.31602388620376587,
      "learning_rate": 9.818943219608326e-05,
      "loss": 0.2632,
      "step": 4138
    },
    {
      "epoch": 1.5284342688330872,
      "grad_norm": 0.3431403636932373,
      "learning_rate": 9.816479862051978e-05,
      "loss": 0.2848,
      "step": 4139
    },
    {
      "epoch": 1.5288035450516988,
      "grad_norm": 0.2444666624069214,
      "learning_rate": 9.814016504495628e-05,
      "loss": 0.1999,
      "step": 4140
    },
    {
      "epoch": 1.5291728212703102,
      "grad_norm": 0.2779955565929413,
      "learning_rate": 9.811553146939278e-05,
      "loss": 0.2132,
      "step": 4141
    },
    {
      "epoch": 1.5295420974889216,
      "grad_norm": 0.40997451543807983,
      "learning_rate": 9.809089789382929e-05,
      "loss": 0.2804,
      "step": 4142
    },
    {
      "epoch": 1.5299113737075332,
      "grad_norm": 0.23134274780750275,
      "learning_rate": 9.80662643182658e-05,
      "loss": 0.2427,
      "step": 4143
    },
    {
      "epoch": 1.5302806499261448,
      "grad_norm": 0.28587669134140015,
      "learning_rate": 9.804163074270231e-05,
      "loss": 0.2208,
      "step": 4144
    },
    {
      "epoch": 1.5306499261447564,
      "grad_norm": 0.254698783159256,
      "learning_rate": 9.801699716713881e-05,
      "loss": 0.2099,
      "step": 4145
    },
    {
      "epoch": 1.5310192023633677,
      "grad_norm": 0.28558284044265747,
      "learning_rate": 9.799236359157532e-05,
      "loss": 0.2553,
      "step": 4146
    },
    {
      "epoch": 1.5313884785819794,
      "grad_norm": 0.2878744602203369,
      "learning_rate": 9.796773001601183e-05,
      "loss": 0.229,
      "step": 4147
    },
    {
      "epoch": 1.5317577548005907,
      "grad_norm": 0.26767411828041077,
      "learning_rate": 9.794309644044834e-05,
      "loss": 0.2418,
      "step": 4148
    },
    {
      "epoch": 1.5321270310192023,
      "grad_norm": 0.2922110855579376,
      "learning_rate": 9.791846286488484e-05,
      "loss": 0.2326,
      "step": 4149
    },
    {
      "epoch": 1.532496307237814,
      "grad_norm": 0.31100940704345703,
      "learning_rate": 9.789382928932134e-05,
      "loss": 0.2941,
      "step": 4150
    },
    {
      "epoch": 1.532496307237814,
      "eval_loss": 0.26925957202911377,
      "eval_runtime": 5.8619,
      "eval_samples_per_second": 8.53,
      "eval_steps_per_second": 1.194,
      "step": 4150
    },
    {
      "epoch": 1.5328655834564255,
      "grad_norm": 0.2562064230442047,
      "learning_rate": 9.786919571375786e-05,
      "loss": 0.218,
      "step": 4151
    },
    {
      "epoch": 1.533234859675037,
      "grad_norm": 0.28819769620895386,
      "learning_rate": 9.784456213819436e-05,
      "loss": 0.237,
      "step": 4152
    },
    {
      "epoch": 1.5336041358936483,
      "grad_norm": 0.23982426524162292,
      "learning_rate": 9.781992856263087e-05,
      "loss": 0.2009,
      "step": 4153
    },
    {
      "epoch": 1.53397341211226,
      "grad_norm": 0.33476221561431885,
      "learning_rate": 9.779529498706737e-05,
      "loss": 0.2213,
      "step": 4154
    },
    {
      "epoch": 1.5343426883308715,
      "grad_norm": 0.24757163226604462,
      "learning_rate": 9.777066141150389e-05,
      "loss": 0.2138,
      "step": 4155
    },
    {
      "epoch": 1.534711964549483,
      "grad_norm": 0.24515816569328308,
      "learning_rate": 9.774602783594039e-05,
      "loss": 0.2073,
      "step": 4156
    },
    {
      "epoch": 1.5350812407680945,
      "grad_norm": 0.35198482871055603,
      "learning_rate": 9.772139426037689e-05,
      "loss": 0.2536,
      "step": 4157
    },
    {
      "epoch": 1.535450516986706,
      "grad_norm": 0.3029174506664276,
      "learning_rate": 9.76967606848134e-05,
      "loss": 0.2366,
      "step": 4158
    },
    {
      "epoch": 1.5358197932053175,
      "grad_norm": 0.25651854276657104,
      "learning_rate": 9.76721271092499e-05,
      "loss": 0.2193,
      "step": 4159
    },
    {
      "epoch": 1.536189069423929,
      "grad_norm": 0.2608349919319153,
      "learning_rate": 9.764749353368642e-05,
      "loss": 0.23,
      "step": 4160
    },
    {
      "epoch": 1.5365583456425407,
      "grad_norm": 0.2651025354862213,
      "learning_rate": 9.762285995812292e-05,
      "loss": 0.2139,
      "step": 4161
    },
    {
      "epoch": 1.5369276218611523,
      "grad_norm": 0.3118842840194702,
      "learning_rate": 9.759822638255944e-05,
      "loss": 0.2408,
      "step": 4162
    },
    {
      "epoch": 1.5372968980797637,
      "grad_norm": 0.29180708527565,
      "learning_rate": 9.757359280699594e-05,
      "loss": 0.2316,
      "step": 4163
    },
    {
      "epoch": 1.537666174298375,
      "grad_norm": 0.2783593237400055,
      "learning_rate": 9.754895923143245e-05,
      "loss": 0.2431,
      "step": 4164
    },
    {
      "epoch": 1.5380354505169866,
      "grad_norm": 0.2899194061756134,
      "learning_rate": 9.752432565586895e-05,
      "loss": 0.2407,
      "step": 4165
    },
    {
      "epoch": 1.5384047267355982,
      "grad_norm": 0.26361024379730225,
      "learning_rate": 9.749969208030545e-05,
      "loss": 0.248,
      "step": 4166
    },
    {
      "epoch": 1.5387740029542099,
      "grad_norm": 0.2489016205072403,
      "learning_rate": 9.747505850474197e-05,
      "loss": 0.225,
      "step": 4167
    },
    {
      "epoch": 1.5391432791728212,
      "grad_norm": 0.25891217589378357,
      "learning_rate": 9.745042492917847e-05,
      "loss": 0.2284,
      "step": 4168
    },
    {
      "epoch": 1.5395125553914328,
      "grad_norm": 0.2712463140487671,
      "learning_rate": 9.742579135361499e-05,
      "loss": 0.2313,
      "step": 4169
    },
    {
      "epoch": 1.5398818316100442,
      "grad_norm": 0.278853178024292,
      "learning_rate": 9.740115777805149e-05,
      "loss": 0.2315,
      "step": 4170
    },
    {
      "epoch": 1.5402511078286558,
      "grad_norm": 0.27925750613212585,
      "learning_rate": 9.7376524202488e-05,
      "loss": 0.2541,
      "step": 4171
    },
    {
      "epoch": 1.5406203840472674,
      "grad_norm": 0.3000519871711731,
      "learning_rate": 9.73518906269245e-05,
      "loss": 0.2243,
      "step": 4172
    },
    {
      "epoch": 1.540989660265879,
      "grad_norm": 0.2703240215778351,
      "learning_rate": 9.7327257051361e-05,
      "loss": 0.2324,
      "step": 4173
    },
    {
      "epoch": 1.5413589364844904,
      "grad_norm": 0.1922842413187027,
      "learning_rate": 9.730262347579752e-05,
      "loss": 0.1967,
      "step": 4174
    },
    {
      "epoch": 1.5417282127031018,
      "grad_norm": 0.3185964524745941,
      "learning_rate": 9.727798990023402e-05,
      "loss": 0.2745,
      "step": 4175
    },
    {
      "epoch": 1.5420974889217134,
      "grad_norm": 0.268771767616272,
      "learning_rate": 9.725335632467053e-05,
      "loss": 0.2343,
      "step": 4176
    },
    {
      "epoch": 1.542466765140325,
      "grad_norm": 0.31947267055511475,
      "learning_rate": 9.722872274910703e-05,
      "loss": 0.2212,
      "step": 4177
    },
    {
      "epoch": 1.5428360413589366,
      "grad_norm": 0.265396386384964,
      "learning_rate": 9.720408917354355e-05,
      "loss": 0.2335,
      "step": 4178
    },
    {
      "epoch": 1.543205317577548,
      "grad_norm": 0.2607923448085785,
      "learning_rate": 9.717945559798005e-05,
      "loss": 0.2071,
      "step": 4179
    },
    {
      "epoch": 1.5435745937961596,
      "grad_norm": 0.2686561346054077,
      "learning_rate": 9.715482202241656e-05,
      "loss": 0.2312,
      "step": 4180
    },
    {
      "epoch": 1.543943870014771,
      "grad_norm": 0.2784029245376587,
      "learning_rate": 9.713018844685307e-05,
      "loss": 0.2427,
      "step": 4181
    },
    {
      "epoch": 1.5443131462333826,
      "grad_norm": 0.33051469922065735,
      "learning_rate": 9.710555487128957e-05,
      "loss": 0.2471,
      "step": 4182
    },
    {
      "epoch": 1.5446824224519942,
      "grad_norm": 0.2989259958267212,
      "learning_rate": 9.708092129572608e-05,
      "loss": 0.2369,
      "step": 4183
    },
    {
      "epoch": 1.5450516986706058,
      "grad_norm": 0.28553035855293274,
      "learning_rate": 9.705628772016258e-05,
      "loss": 0.2339,
      "step": 4184
    },
    {
      "epoch": 1.5454209748892171,
      "grad_norm": 0.2696689963340759,
      "learning_rate": 9.70316541445991e-05,
      "loss": 0.2452,
      "step": 4185
    },
    {
      "epoch": 1.5457902511078285,
      "grad_norm": 0.27339741587638855,
      "learning_rate": 9.70070205690356e-05,
      "loss": 0.2686,
      "step": 4186
    },
    {
      "epoch": 1.5461595273264401,
      "grad_norm": 0.2567724883556366,
      "learning_rate": 9.698238699347211e-05,
      "loss": 0.2203,
      "step": 4187
    },
    {
      "epoch": 1.5465288035450517,
      "grad_norm": 0.26803749799728394,
      "learning_rate": 9.695775341790861e-05,
      "loss": 0.2262,
      "step": 4188
    },
    {
      "epoch": 1.5468980797636633,
      "grad_norm": 0.28053510189056396,
      "learning_rate": 9.693311984234512e-05,
      "loss": 0.236,
      "step": 4189
    },
    {
      "epoch": 1.5472673559822747,
      "grad_norm": 0.27617505192756653,
      "learning_rate": 9.690848626678163e-05,
      "loss": 0.2536,
      "step": 4190
    },
    {
      "epoch": 1.547636632200886,
      "grad_norm": 0.2775917649269104,
      "learning_rate": 9.688385269121813e-05,
      "loss": 0.2638,
      "step": 4191
    },
    {
      "epoch": 1.5480059084194977,
      "grad_norm": 0.23348627984523773,
      "learning_rate": 9.685921911565465e-05,
      "loss": 0.2177,
      "step": 4192
    },
    {
      "epoch": 1.5483751846381093,
      "grad_norm": 0.3035806715488434,
      "learning_rate": 9.683458554009115e-05,
      "loss": 0.2106,
      "step": 4193
    },
    {
      "epoch": 1.548744460856721,
      "grad_norm": 0.2474101036787033,
      "learning_rate": 9.680995196452766e-05,
      "loss": 0.2106,
      "step": 4194
    },
    {
      "epoch": 1.5491137370753325,
      "grad_norm": 0.2989620268344879,
      "learning_rate": 9.678531838896416e-05,
      "loss": 0.2498,
      "step": 4195
    },
    {
      "epoch": 1.549483013293944,
      "grad_norm": 0.2706199884414673,
      "learning_rate": 9.676068481340068e-05,
      "loss": 0.2492,
      "step": 4196
    },
    {
      "epoch": 1.5498522895125553,
      "grad_norm": 0.2636919617652893,
      "learning_rate": 9.673605123783718e-05,
      "loss": 0.2539,
      "step": 4197
    },
    {
      "epoch": 1.5502215657311669,
      "grad_norm": 0.25813478231430054,
      "learning_rate": 9.671141766227368e-05,
      "loss": 0.2344,
      "step": 4198
    },
    {
      "epoch": 1.5505908419497785,
      "grad_norm": 0.26605188846588135,
      "learning_rate": 9.66867840867102e-05,
      "loss": 0.2125,
      "step": 4199
    },
    {
      "epoch": 1.55096011816839,
      "grad_norm": 0.28155773878097534,
      "learning_rate": 9.66621505111467e-05,
      "loss": 0.2536,
      "step": 4200
    },
    {
      "epoch": 1.55096011816839,
      "eval_loss": 0.26491570472717285,
      "eval_runtime": 5.856,
      "eval_samples_per_second": 8.538,
      "eval_steps_per_second": 1.195,
      "step": 4200
    },
    {
      "epoch": 1.5513293943870015,
      "grad_norm": 0.27864086627960205,
      "learning_rate": 9.663751693558321e-05,
      "loss": 0.3091,
      "step": 4201
    },
    {
      "epoch": 1.5516986706056128,
      "grad_norm": 0.27152353525161743,
      "learning_rate": 9.661288336001971e-05,
      "loss": 0.2605,
      "step": 4202
    },
    {
      "epoch": 1.5520679468242244,
      "grad_norm": 0.27896204590797424,
      "learning_rate": 9.658824978445623e-05,
      "loss": 0.2688,
      "step": 4203
    },
    {
      "epoch": 1.552437223042836,
      "grad_norm": 0.2809586822986603,
      "learning_rate": 9.656361620889273e-05,
      "loss": 0.2211,
      "step": 4204
    },
    {
      "epoch": 1.5528064992614476,
      "grad_norm": 0.3071771264076233,
      "learning_rate": 9.653898263332923e-05,
      "loss": 0.25,
      "step": 4205
    },
    {
      "epoch": 1.553175775480059,
      "grad_norm": 0.23369184136390686,
      "learning_rate": 9.651434905776574e-05,
      "loss": 0.219,
      "step": 4206
    },
    {
      "epoch": 1.5535450516986706,
      "grad_norm": 0.27824506163597107,
      "learning_rate": 9.648971548220224e-05,
      "loss": 0.248,
      "step": 4207
    },
    {
      "epoch": 1.553914327917282,
      "grad_norm": 0.3109837472438812,
      "learning_rate": 9.646508190663876e-05,
      "loss": 0.2723,
      "step": 4208
    },
    {
      "epoch": 1.5542836041358936,
      "grad_norm": 0.322033166885376,
      "learning_rate": 9.644044833107526e-05,
      "loss": 0.2423,
      "step": 4209
    },
    {
      "epoch": 1.5546528803545052,
      "grad_norm": 0.2386239618062973,
      "learning_rate": 9.641581475551177e-05,
      "loss": 0.2235,
      "step": 4210
    },
    {
      "epoch": 1.5550221565731168,
      "grad_norm": 0.29537051916122437,
      "learning_rate": 9.639118117994827e-05,
      "loss": 0.1921,
      "step": 4211
    },
    {
      "epoch": 1.5553914327917282,
      "grad_norm": 0.2355179637670517,
      "learning_rate": 9.636654760438479e-05,
      "loss": 0.2129,
      "step": 4212
    },
    {
      "epoch": 1.5557607090103396,
      "grad_norm": 0.26878058910369873,
      "learning_rate": 9.634191402882129e-05,
      "loss": 0.2275,
      "step": 4213
    },
    {
      "epoch": 1.5561299852289512,
      "grad_norm": 0.5514440536499023,
      "learning_rate": 9.631728045325779e-05,
      "loss": 0.2754,
      "step": 4214
    },
    {
      "epoch": 1.5564992614475628,
      "grad_norm": 0.2654857039451599,
      "learning_rate": 9.62926468776943e-05,
      "loss": 0.2261,
      "step": 4215
    },
    {
      "epoch": 1.5568685376661744,
      "grad_norm": 0.25727444887161255,
      "learning_rate": 9.626801330213081e-05,
      "loss": 0.2622,
      "step": 4216
    },
    {
      "epoch": 1.5572378138847858,
      "grad_norm": 0.2708493769168854,
      "learning_rate": 9.624337972656732e-05,
      "loss": 0.2145,
      "step": 4217
    },
    {
      "epoch": 1.5576070901033974,
      "grad_norm": 0.30159351229667664,
      "learning_rate": 9.621874615100382e-05,
      "loss": 0.2645,
      "step": 4218
    },
    {
      "epoch": 1.5579763663220088,
      "grad_norm": 0.29582643508911133,
      "learning_rate": 9.619411257544034e-05,
      "loss": 0.3205,
      "step": 4219
    },
    {
      "epoch": 1.5583456425406204,
      "grad_norm": 0.2497013956308365,
      "learning_rate": 9.616947899987684e-05,
      "loss": 0.2433,
      "step": 4220
    },
    {
      "epoch": 1.558714918759232,
      "grad_norm": 0.2375938892364502,
      "learning_rate": 9.614484542431334e-05,
      "loss": 0.2153,
      "step": 4221
    },
    {
      "epoch": 1.5590841949778436,
      "grad_norm": 0.2669118046760559,
      "learning_rate": 9.612021184874985e-05,
      "loss": 0.2114,
      "step": 4222
    },
    {
      "epoch": 1.559453471196455,
      "grad_norm": 0.3310312330722809,
      "learning_rate": 9.609557827318636e-05,
      "loss": 0.2379,
      "step": 4223
    },
    {
      "epoch": 1.5598227474150663,
      "grad_norm": 0.2724936008453369,
      "learning_rate": 9.607094469762287e-05,
      "loss": 0.2079,
      "step": 4224
    },
    {
      "epoch": 1.560192023633678,
      "grad_norm": 0.326214075088501,
      "learning_rate": 9.604631112205937e-05,
      "loss": 0.233,
      "step": 4225
    },
    {
      "epoch": 1.5605612998522895,
      "grad_norm": 0.2730662524700165,
      "learning_rate": 9.602167754649589e-05,
      "loss": 0.2331,
      "step": 4226
    },
    {
      "epoch": 1.5609305760709011,
      "grad_norm": 0.2563663721084595,
      "learning_rate": 9.599704397093239e-05,
      "loss": 0.2211,
      "step": 4227
    },
    {
      "epoch": 1.5612998522895125,
      "grad_norm": 0.24609240889549255,
      "learning_rate": 9.597241039536889e-05,
      "loss": 0.2223,
      "step": 4228
    },
    {
      "epoch": 1.5616691285081241,
      "grad_norm": 0.2626035213470459,
      "learning_rate": 9.59477768198054e-05,
      "loss": 0.2328,
      "step": 4229
    },
    {
      "epoch": 1.5620384047267355,
      "grad_norm": 0.24416084587574005,
      "learning_rate": 9.59231432442419e-05,
      "loss": 0.2087,
      "step": 4230
    },
    {
      "epoch": 1.562407680945347,
      "grad_norm": 0.25252941250801086,
      "learning_rate": 9.589850966867842e-05,
      "loss": 0.2319,
      "step": 4231
    },
    {
      "epoch": 1.5627769571639587,
      "grad_norm": 0.2506401538848877,
      "learning_rate": 9.587387609311492e-05,
      "loss": 0.2246,
      "step": 4232
    },
    {
      "epoch": 1.5631462333825703,
      "grad_norm": 0.29225239157676697,
      "learning_rate": 9.584924251755143e-05,
      "loss": 0.234,
      "step": 4233
    },
    {
      "epoch": 1.5635155096011817,
      "grad_norm": 0.2837737500667572,
      "learning_rate": 9.582460894198794e-05,
      "loss": 0.2275,
      "step": 4234
    },
    {
      "epoch": 1.563884785819793,
      "grad_norm": 0.26541614532470703,
      "learning_rate": 9.579997536642445e-05,
      "loss": 0.232,
      "step": 4235
    },
    {
      "epoch": 1.5642540620384047,
      "grad_norm": 0.24503584206104279,
      "learning_rate": 9.577534179086095e-05,
      "loss": 0.2246,
      "step": 4236
    },
    {
      "epoch": 1.5646233382570163,
      "grad_norm": 0.282164067029953,
      "learning_rate": 9.575070821529745e-05,
      "loss": 0.2383,
      "step": 4237
    },
    {
      "epoch": 1.5649926144756279,
      "grad_norm": 0.2575039565563202,
      "learning_rate": 9.572607463973397e-05,
      "loss": 0.2191,
      "step": 4238
    },
    {
      "epoch": 1.5653618906942393,
      "grad_norm": 0.17597277462482452,
      "learning_rate": 9.570144106417047e-05,
      "loss": 0.1631,
      "step": 4239
    },
    {
      "epoch": 1.5657311669128509,
      "grad_norm": 0.3703117370605469,
      "learning_rate": 9.567680748860698e-05,
      "loss": 0.2321,
      "step": 4240
    },
    {
      "epoch": 1.5661004431314622,
      "grad_norm": 0.24686135351657867,
      "learning_rate": 9.565217391304348e-05,
      "loss": 0.2189,
      "step": 4241
    },
    {
      "epoch": 1.5664697193500738,
      "grad_norm": 0.28961747884750366,
      "learning_rate": 9.562754033748e-05,
      "loss": 0.2488,
      "step": 4242
    },
    {
      "epoch": 1.5668389955686854,
      "grad_norm": 0.25977790355682373,
      "learning_rate": 9.56029067619165e-05,
      "loss": 0.2177,
      "step": 4243
    },
    {
      "epoch": 1.567208271787297,
      "grad_norm": 0.27536264061927795,
      "learning_rate": 9.5578273186353e-05,
      "loss": 0.2142,
      "step": 4244
    },
    {
      "epoch": 1.5675775480059084,
      "grad_norm": 0.3315596580505371,
      "learning_rate": 9.555363961078951e-05,
      "loss": 0.2689,
      "step": 4245
    },
    {
      "epoch": 1.5679468242245198,
      "grad_norm": 0.35292044281959534,
      "learning_rate": 9.552900603522602e-05,
      "loss": 0.2434,
      "step": 4246
    },
    {
      "epoch": 1.5683161004431314,
      "grad_norm": 0.29545414447784424,
      "learning_rate": 9.550437245966253e-05,
      "loss": 0.2612,
      "step": 4247
    },
    {
      "epoch": 1.568685376661743,
      "grad_norm": 0.2903496026992798,
      "learning_rate": 9.547973888409903e-05,
      "loss": 0.2248,
      "step": 4248
    },
    {
      "epoch": 1.5690546528803546,
      "grad_norm": 0.3032342493534088,
      "learning_rate": 9.545510530853555e-05,
      "loss": 0.2754,
      "step": 4249
    },
    {
      "epoch": 1.569423929098966,
      "grad_norm": 0.23315784335136414,
      "learning_rate": 9.543047173297205e-05,
      "loss": 0.1979,
      "step": 4250
    },
    {
      "epoch": 1.569423929098966,
      "eval_loss": 0.26710450649261475,
      "eval_runtime": 5.8493,
      "eval_samples_per_second": 8.548,
      "eval_steps_per_second": 1.197,
      "step": 4250
    },
    {
      "epoch": 1.5697932053175776,
      "grad_norm": 0.24444057047367096,
      "learning_rate": 9.540583815740856e-05,
      "loss": 0.2093,
      "step": 4251
    },
    {
      "epoch": 1.570162481536189,
      "grad_norm": 0.30540236830711365,
      "learning_rate": 9.538120458184506e-05,
      "loss": 0.2497,
      "step": 4252
    },
    {
      "epoch": 1.5705317577548006,
      "grad_norm": 0.3088052272796631,
      "learning_rate": 9.535657100628156e-05,
      "loss": 0.2237,
      "step": 4253
    },
    {
      "epoch": 1.5709010339734122,
      "grad_norm": 0.3352174460887909,
      "learning_rate": 9.533193743071808e-05,
      "loss": 0.2535,
      "step": 4254
    },
    {
      "epoch": 1.5712703101920238,
      "grad_norm": 0.2540420889854431,
      "learning_rate": 9.530730385515458e-05,
      "loss": 0.2359,
      "step": 4255
    },
    {
      "epoch": 1.5716395864106352,
      "grad_norm": 0.2622866630554199,
      "learning_rate": 9.52826702795911e-05,
      "loss": 0.2152,
      "step": 4256
    },
    {
      "epoch": 1.5720088626292466,
      "grad_norm": 0.30007508397102356,
      "learning_rate": 9.52580367040276e-05,
      "loss": 0.2747,
      "step": 4257
    },
    {
      "epoch": 1.5723781388478582,
      "grad_norm": 0.3477882146835327,
      "learning_rate": 9.523340312846411e-05,
      "loss": 0.2534,
      "step": 4258
    },
    {
      "epoch": 1.5727474150664698,
      "grad_norm": 0.26416993141174316,
      "learning_rate": 9.520876955290061e-05,
      "loss": 0.184,
      "step": 4259
    },
    {
      "epoch": 1.5731166912850814,
      "grad_norm": 0.29270222783088684,
      "learning_rate": 9.518413597733711e-05,
      "loss": 0.2596,
      "step": 4260
    },
    {
      "epoch": 1.5734859675036927,
      "grad_norm": 0.2309563308954239,
      "learning_rate": 9.515950240177363e-05,
      "loss": 0.1776,
      "step": 4261
    },
    {
      "epoch": 1.5738552437223041,
      "grad_norm": 0.3029315173625946,
      "learning_rate": 9.513486882621013e-05,
      "loss": 0.2582,
      "step": 4262
    },
    {
      "epoch": 1.5742245199409157,
      "grad_norm": 0.27996766567230225,
      "learning_rate": 9.511023525064664e-05,
      "loss": 0.224,
      "step": 4263
    },
    {
      "epoch": 1.5745937961595273,
      "grad_norm": 0.25720569491386414,
      "learning_rate": 9.508560167508314e-05,
      "loss": 0.203,
      "step": 4264
    },
    {
      "epoch": 1.574963072378139,
      "grad_norm": 0.3253585696220398,
      "learning_rate": 9.506096809951966e-05,
      "loss": 0.2461,
      "step": 4265
    },
    {
      "epoch": 1.5753323485967505,
      "grad_norm": 0.30563682317733765,
      "learning_rate": 9.503633452395616e-05,
      "loss": 0.2321,
      "step": 4266
    },
    {
      "epoch": 1.575701624815362,
      "grad_norm": 0.26521727442741394,
      "learning_rate": 9.501170094839267e-05,
      "loss": 0.2599,
      "step": 4267
    },
    {
      "epoch": 1.5760709010339733,
      "grad_norm": 0.3069210350513458,
      "learning_rate": 9.498706737282918e-05,
      "loss": 0.245,
      "step": 4268
    },
    {
      "epoch": 1.576440177252585,
      "grad_norm": 0.2539284825325012,
      "learning_rate": 9.496243379726568e-05,
      "loss": 0.1993,
      "step": 4269
    },
    {
      "epoch": 1.5768094534711965,
      "grad_norm": 0.2633403539657593,
      "learning_rate": 9.493780022170219e-05,
      "loss": 0.2012,
      "step": 4270
    },
    {
      "epoch": 1.577178729689808,
      "grad_norm": 0.30276525020599365,
      "learning_rate": 9.491316664613869e-05,
      "loss": 0.2738,
      "step": 4271
    },
    {
      "epoch": 1.5775480059084195,
      "grad_norm": 0.24089419841766357,
      "learning_rate": 9.488853307057521e-05,
      "loss": 0.2213,
      "step": 4272
    },
    {
      "epoch": 1.5779172821270309,
      "grad_norm": 0.29699668288230896,
      "learning_rate": 9.486389949501171e-05,
      "loss": 0.2707,
      "step": 4273
    },
    {
      "epoch": 1.5782865583456425,
      "grad_norm": 0.2622053623199463,
      "learning_rate": 9.483926591944822e-05,
      "loss": 0.2418,
      "step": 4274
    },
    {
      "epoch": 1.578655834564254,
      "grad_norm": 0.29047226905822754,
      "learning_rate": 9.481463234388472e-05,
      "loss": 0.2224,
      "step": 4275
    },
    {
      "epoch": 1.5790251107828657,
      "grad_norm": 0.3342186510562897,
      "learning_rate": 9.478999876832122e-05,
      "loss": 0.2878,
      "step": 4276
    },
    {
      "epoch": 1.579394387001477,
      "grad_norm": 0.24895580112934113,
      "learning_rate": 9.476536519275774e-05,
      "loss": 0.1882,
      "step": 4277
    },
    {
      "epoch": 1.5797636632200887,
      "grad_norm": 0.3772604763507843,
      "learning_rate": 9.474073161719424e-05,
      "loss": 0.2985,
      "step": 4278
    },
    {
      "epoch": 1.5801329394387,
      "grad_norm": 0.260893315076828,
      "learning_rate": 9.471609804163076e-05,
      "loss": 0.2064,
      "step": 4279
    },
    {
      "epoch": 1.5805022156573116,
      "grad_norm": 0.27421626448631287,
      "learning_rate": 9.469146446606726e-05,
      "loss": 0.2387,
      "step": 4280
    },
    {
      "epoch": 1.5808714918759232,
      "grad_norm": 0.27801311016082764,
      "learning_rate": 9.466683089050377e-05,
      "loss": 0.2703,
      "step": 4281
    },
    {
      "epoch": 1.5812407680945348,
      "grad_norm": 0.2561623156070709,
      "learning_rate": 9.464219731494027e-05,
      "loss": 0.2304,
      "step": 4282
    },
    {
      "epoch": 1.5816100443131462,
      "grad_norm": 0.26812848448753357,
      "learning_rate": 9.461756373937679e-05,
      "loss": 0.231,
      "step": 4283
    },
    {
      "epoch": 1.5819793205317576,
      "grad_norm": 0.3009648025035858,
      "learning_rate": 9.459293016381329e-05,
      "loss": 0.2445,
      "step": 4284
    },
    {
      "epoch": 1.5823485967503692,
      "grad_norm": 0.29099470376968384,
      "learning_rate": 9.456829658824979e-05,
      "loss": 0.215,
      "step": 4285
    },
    {
      "epoch": 1.5827178729689808,
      "grad_norm": 0.2548910677433014,
      "learning_rate": 9.45436630126863e-05,
      "loss": 0.2143,
      "step": 4286
    },
    {
      "epoch": 1.5830871491875924,
      "grad_norm": 0.5736655592918396,
      "learning_rate": 9.45190294371228e-05,
      "loss": 0.2081,
      "step": 4287
    },
    {
      "epoch": 1.5834564254062038,
      "grad_norm": 0.2578420341014862,
      "learning_rate": 9.449439586155932e-05,
      "loss": 0.215,
      "step": 4288
    },
    {
      "epoch": 1.5838257016248154,
      "grad_norm": 0.3453446924686432,
      "learning_rate": 9.446976228599582e-05,
      "loss": 0.2783,
      "step": 4289
    },
    {
      "epoch": 1.5841949778434268,
      "grad_norm": 0.31030216813087463,
      "learning_rate": 9.444512871043233e-05,
      "loss": 0.2469,
      "step": 4290
    },
    {
      "epoch": 1.5845642540620384,
      "grad_norm": 0.283894807100296,
      "learning_rate": 9.442049513486884e-05,
      "loss": 0.2173,
      "step": 4291
    },
    {
      "epoch": 1.58493353028065,
      "grad_norm": 0.20466217398643494,
      "learning_rate": 9.439586155930534e-05,
      "loss": 0.1964,
      "step": 4292
    },
    {
      "epoch": 1.5853028064992616,
      "grad_norm": 0.33935806155204773,
      "learning_rate": 9.437122798374185e-05,
      "loss": 0.2731,
      "step": 4293
    },
    {
      "epoch": 1.585672082717873,
      "grad_norm": 0.3064669370651245,
      "learning_rate": 9.434659440817835e-05,
      "loss": 0.2202,
      "step": 4294
    },
    {
      "epoch": 1.5860413589364843,
      "grad_norm": 0.31215453147888184,
      "learning_rate": 9.432196083261487e-05,
      "loss": 0.2566,
      "step": 4295
    },
    {
      "epoch": 1.586410635155096,
      "grad_norm": 0.24009639024734497,
      "learning_rate": 9.429732725705137e-05,
      "loss": 0.1968,
      "step": 4296
    },
    {
      "epoch": 1.5867799113737076,
      "grad_norm": 0.34021276235580444,
      "learning_rate": 9.427269368148787e-05,
      "loss": 0.302,
      "step": 4297
    },
    {
      "epoch": 1.5871491875923192,
      "grad_norm": 0.2594415545463562,
      "learning_rate": 9.424806010592437e-05,
      "loss": 0.1936,
      "step": 4298
    },
    {
      "epoch": 1.5875184638109305,
      "grad_norm": 0.26243454217910767,
      "learning_rate": 9.422342653036089e-05,
      "loss": 0.2316,
      "step": 4299
    },
    {
      "epoch": 1.5878877400295421,
      "grad_norm": 0.25612276792526245,
      "learning_rate": 9.419879295479739e-05,
      "loss": 0.2599,
      "step": 4300
    },
    {
      "epoch": 1.5878877400295421,
      "eval_loss": 0.2657601833343506,
      "eval_runtime": 5.8652,
      "eval_samples_per_second": 8.525,
      "eval_steps_per_second": 1.193,
      "step": 4300
    },
    {
      "epoch": 1.5882570162481535,
      "grad_norm": 0.2628798186779022,
      "learning_rate": 9.41741593792339e-05,
      "loss": 0.2186,
      "step": 4301
    },
    {
      "epoch": 1.5886262924667651,
      "grad_norm": 0.3352354168891907,
      "learning_rate": 9.41495258036704e-05,
      "loss": 0.2316,
      "step": 4302
    },
    {
      "epoch": 1.5889955686853767,
      "grad_norm": 0.2656700313091278,
      "learning_rate": 9.41248922281069e-05,
      "loss": 0.2315,
      "step": 4303
    },
    {
      "epoch": 1.5893648449039883,
      "grad_norm": 0.291464239358902,
      "learning_rate": 9.410025865254342e-05,
      "loss": 0.2204,
      "step": 4304
    },
    {
      "epoch": 1.5897341211225997,
      "grad_norm": 0.2992652654647827,
      "learning_rate": 9.407562507697992e-05,
      "loss": 0.2159,
      "step": 4305
    },
    {
      "epoch": 1.590103397341211,
      "grad_norm": 0.2757866680622101,
      "learning_rate": 9.405099150141643e-05,
      "loss": 0.2129,
      "step": 4306
    },
    {
      "epoch": 1.5904726735598227,
      "grad_norm": 0.2500758171081543,
      "learning_rate": 9.402635792585293e-05,
      "loss": 0.2006,
      "step": 4307
    },
    {
      "epoch": 1.5908419497784343,
      "grad_norm": 0.2814030051231384,
      "learning_rate": 9.400172435028945e-05,
      "loss": 0.2456,
      "step": 4308
    },
    {
      "epoch": 1.591211225997046,
      "grad_norm": 0.37658214569091797,
      "learning_rate": 9.397709077472595e-05,
      "loss": 0.2899,
      "step": 4309
    },
    {
      "epoch": 1.5915805022156573,
      "grad_norm": 0.29137834906578064,
      "learning_rate": 9.395245719916245e-05,
      "loss": 0.2364,
      "step": 4310
    },
    {
      "epoch": 1.5919497784342689,
      "grad_norm": 0.28656837344169617,
      "learning_rate": 9.392782362359897e-05,
      "loss": 0.2448,
      "step": 4311
    },
    {
      "epoch": 1.5923190546528803,
      "grad_norm": 0.21193453669548035,
      "learning_rate": 9.390319004803547e-05,
      "loss": 0.1986,
      "step": 4312
    },
    {
      "epoch": 1.5926883308714919,
      "grad_norm": 0.22614000737667084,
      "learning_rate": 9.387855647247198e-05,
      "loss": 0.1926,
      "step": 4313
    },
    {
      "epoch": 1.5930576070901035,
      "grad_norm": 0.34158673882484436,
      "learning_rate": 9.385392289690848e-05,
      "loss": 0.2315,
      "step": 4314
    },
    {
      "epoch": 1.593426883308715,
      "grad_norm": 0.274847149848938,
      "learning_rate": 9.3829289321345e-05,
      "loss": 0.2177,
      "step": 4315
    },
    {
      "epoch": 1.5937961595273265,
      "grad_norm": 0.2810226380825043,
      "learning_rate": 9.38046557457815e-05,
      "loss": 0.2522,
      "step": 4316
    },
    {
      "epoch": 1.5941654357459378,
      "grad_norm": 0.33540499210357666,
      "learning_rate": 9.378002217021801e-05,
      "loss": 0.2631,
      "step": 4317
    },
    {
      "epoch": 1.5945347119645494,
      "grad_norm": 0.3179091215133667,
      "learning_rate": 9.375538859465451e-05,
      "loss": 0.2796,
      "step": 4318
    },
    {
      "epoch": 1.594903988183161,
      "grad_norm": 0.2694183588027954,
      "learning_rate": 9.373075501909102e-05,
      "loss": 0.2675,
      "step": 4319
    },
    {
      "epoch": 1.5952732644017726,
      "grad_norm": 0.26848018169403076,
      "learning_rate": 9.370612144352753e-05,
      "loss": 0.2381,
      "step": 4320
    },
    {
      "epoch": 1.595642540620384,
      "grad_norm": 0.27807292342185974,
      "learning_rate": 9.368148786796403e-05,
      "loss": 0.2211,
      "step": 4321
    },
    {
      "epoch": 1.5960118168389956,
      "grad_norm": 0.37645626068115234,
      "learning_rate": 9.365685429240055e-05,
      "loss": 0.3056,
      "step": 4322
    },
    {
      "epoch": 1.596381093057607,
      "grad_norm": 0.2805720567703247,
      "learning_rate": 9.363222071683705e-05,
      "loss": 0.226,
      "step": 4323
    },
    {
      "epoch": 1.5967503692762186,
      "grad_norm": 0.2529265284538269,
      "learning_rate": 9.360758714127356e-05,
      "loss": 0.2304,
      "step": 4324
    },
    {
      "epoch": 1.5971196454948302,
      "grad_norm": 0.2973952293395996,
      "learning_rate": 9.358295356571006e-05,
      "loss": 0.2078,
      "step": 4325
    },
    {
      "epoch": 1.5974889217134418,
      "grad_norm": 0.3685002624988556,
      "learning_rate": 9.355831999014656e-05,
      "loss": 0.2365,
      "step": 4326
    },
    {
      "epoch": 1.5978581979320532,
      "grad_norm": 0.30593231320381165,
      "learning_rate": 9.353368641458308e-05,
      "loss": 0.2397,
      "step": 4327
    },
    {
      "epoch": 1.5982274741506646,
      "grad_norm": 0.287945032119751,
      "learning_rate": 9.350905283901958e-05,
      "loss": 0.2171,
      "step": 4328
    },
    {
      "epoch": 1.5985967503692762,
      "grad_norm": 0.353727787733078,
      "learning_rate": 9.34844192634561e-05,
      "loss": 0.2962,
      "step": 4329
    },
    {
      "epoch": 1.5989660265878878,
      "grad_norm": 0.27644434571266174,
      "learning_rate": 9.34597856878926e-05,
      "loss": 0.2336,
      "step": 4330
    },
    {
      "epoch": 1.5993353028064994,
      "grad_norm": 0.30194446444511414,
      "learning_rate": 9.343515211232911e-05,
      "loss": 0.2742,
      "step": 4331
    },
    {
      "epoch": 1.5997045790251108,
      "grad_norm": 0.2946053743362427,
      "learning_rate": 9.341051853676561e-05,
      "loss": 0.2354,
      "step": 4332
    },
    {
      "epoch": 1.6000738552437221,
      "grad_norm": 0.28401628136634827,
      "learning_rate": 9.338588496120213e-05,
      "loss": 0.2406,
      "step": 4333
    },
    {
      "epoch": 1.6004431314623337,
      "grad_norm": 0.27871960401535034,
      "learning_rate": 9.336125138563863e-05,
      "loss": 0.2448,
      "step": 4334
    },
    {
      "epoch": 1.6008124076809453,
      "grad_norm": 0.29729315638542175,
      "learning_rate": 9.333661781007513e-05,
      "loss": 0.2727,
      "step": 4335
    },
    {
      "epoch": 1.601181683899557,
      "grad_norm": 0.2805337905883789,
      "learning_rate": 9.331198423451164e-05,
      "loss": 0.2661,
      "step": 4336
    },
    {
      "epoch": 1.6015509601181686,
      "grad_norm": 0.26619741320610046,
      "learning_rate": 9.328735065894814e-05,
      "loss": 0.2459,
      "step": 4337
    },
    {
      "epoch": 1.60192023633678,
      "grad_norm": 0.2586030662059784,
      "learning_rate": 9.326271708338466e-05,
      "loss": 0.2413,
      "step": 4338
    },
    {
      "epoch": 1.6022895125553913,
      "grad_norm": 0.28171306848526,
      "learning_rate": 9.323808350782116e-05,
      "loss": 0.2425,
      "step": 4339
    },
    {
      "epoch": 1.602658788774003,
      "grad_norm": 0.2730322480201721,
      "learning_rate": 9.321344993225767e-05,
      "loss": 0.231,
      "step": 4340
    },
    {
      "epoch": 1.6030280649926145,
      "grad_norm": 0.2533946633338928,
      "learning_rate": 9.318881635669417e-05,
      "loss": 0.2484,
      "step": 4341
    },
    {
      "epoch": 1.6033973412112261,
      "grad_norm": 0.3294993042945862,
      "learning_rate": 9.316418278113068e-05,
      "loss": 0.2564,
      "step": 4342
    },
    {
      "epoch": 1.6037666174298375,
      "grad_norm": 0.2796359360218048,
      "learning_rate": 9.313954920556719e-05,
      "loss": 0.2769,
      "step": 4343
    },
    {
      "epoch": 1.6041358936484489,
      "grad_norm": 0.3053548336029053,
      "learning_rate": 9.311491563000369e-05,
      "loss": 0.2712,
      "step": 4344
    },
    {
      "epoch": 1.6045051698670605,
      "grad_norm": 0.23783549666404724,
      "learning_rate": 9.30902820544402e-05,
      "loss": 0.2177,
      "step": 4345
    },
    {
      "epoch": 1.604874446085672,
      "grad_norm": 0.2943350076675415,
      "learning_rate": 9.306564847887671e-05,
      "loss": 0.2202,
      "step": 4346
    },
    {
      "epoch": 1.6052437223042837,
      "grad_norm": 0.29149776697158813,
      "learning_rate": 9.304101490331322e-05,
      "loss": 0.231,
      "step": 4347
    },
    {
      "epoch": 1.605612998522895,
      "grad_norm": 0.2750820219516754,
      "learning_rate": 9.301638132774972e-05,
      "loss": 0.2227,
      "step": 4348
    },
    {
      "epoch": 1.6059822747415067,
      "grad_norm": 0.2783328890800476,
      "learning_rate": 9.299174775218624e-05,
      "loss": 0.223,
      "step": 4349
    },
    {
      "epoch": 1.606351550960118,
      "grad_norm": 0.23471657931804657,
      "learning_rate": 9.296711417662274e-05,
      "loss": 0.2069,
      "step": 4350
    },
    {
      "epoch": 1.606351550960118,
      "eval_loss": 0.2663235068321228,
      "eval_runtime": 5.8608,
      "eval_samples_per_second": 8.531,
      "eval_steps_per_second": 1.194,
      "step": 4350
    },
    {
      "epoch": 1.6067208271787297,
      "grad_norm": 0.3521765470504761,
      "learning_rate": 9.294248060105924e-05,
      "loss": 0.2556,
      "step": 4351
    },
    {
      "epoch": 1.6070901033973413,
      "grad_norm": 0.307546466588974,
      "learning_rate": 9.291784702549575e-05,
      "loss": 0.2705,
      "step": 4352
    },
    {
      "epoch": 1.6074593796159529,
      "grad_norm": 0.31380757689476013,
      "learning_rate": 9.289321344993226e-05,
      "loss": 0.2548,
      "step": 4353
    },
    {
      "epoch": 1.6078286558345642,
      "grad_norm": 0.20991113781929016,
      "learning_rate": 9.286857987436877e-05,
      "loss": 0.1966,
      "step": 4354
    },
    {
      "epoch": 1.6081979320531756,
      "grad_norm": 0.2921874225139618,
      "learning_rate": 9.284394629880527e-05,
      "loss": 0.2539,
      "step": 4355
    },
    {
      "epoch": 1.6085672082717872,
      "grad_norm": 0.2973816692829132,
      "learning_rate": 9.281931272324179e-05,
      "loss": 0.2543,
      "step": 4356
    },
    {
      "epoch": 1.6089364844903988,
      "grad_norm": 0.23171468079090118,
      "learning_rate": 9.279467914767829e-05,
      "loss": 0.2032,
      "step": 4357
    },
    {
      "epoch": 1.6093057607090104,
      "grad_norm": 0.28898993134498596,
      "learning_rate": 9.277004557211479e-05,
      "loss": 0.2166,
      "step": 4358
    },
    {
      "epoch": 1.6096750369276218,
      "grad_norm": 0.3716563284397125,
      "learning_rate": 9.27454119965513e-05,
      "loss": 0.2859,
      "step": 4359
    },
    {
      "epoch": 1.6100443131462334,
      "grad_norm": 0.3015817403793335,
      "learning_rate": 9.27207784209878e-05,
      "loss": 0.2055,
      "step": 4360
    },
    {
      "epoch": 1.6104135893648448,
      "grad_norm": 0.26564592123031616,
      "learning_rate": 9.269614484542432e-05,
      "loss": 0.1959,
      "step": 4361
    },
    {
      "epoch": 1.6107828655834564,
      "grad_norm": 0.3186861574649811,
      "learning_rate": 9.267151126986082e-05,
      "loss": 0.26,
      "step": 4362
    },
    {
      "epoch": 1.611152141802068,
      "grad_norm": 0.28396859765052795,
      "learning_rate": 9.264687769429733e-05,
      "loss": 0.2461,
      "step": 4363
    },
    {
      "epoch": 1.6115214180206796,
      "grad_norm": 0.2832779288291931,
      "learning_rate": 9.262224411873384e-05,
      "loss": 0.2233,
      "step": 4364
    },
    {
      "epoch": 1.611890694239291,
      "grad_norm": 0.28067728877067566,
      "learning_rate": 9.259761054317035e-05,
      "loss": 0.243,
      "step": 4365
    },
    {
      "epoch": 1.6122599704579024,
      "grad_norm": 0.29506027698516846,
      "learning_rate": 9.257297696760685e-05,
      "loss": 0.2475,
      "step": 4366
    },
    {
      "epoch": 1.612629246676514,
      "grad_norm": 0.2841379940509796,
      "learning_rate": 9.254834339204335e-05,
      "loss": 0.243,
      "step": 4367
    },
    {
      "epoch": 1.6129985228951256,
      "grad_norm": 0.28466540575027466,
      "learning_rate": 9.252370981647987e-05,
      "loss": 0.2117,
      "step": 4368
    },
    {
      "epoch": 1.6133677991137372,
      "grad_norm": 0.32267874479293823,
      "learning_rate": 9.249907624091637e-05,
      "loss": 0.27,
      "step": 4369
    },
    {
      "epoch": 1.6137370753323486,
      "grad_norm": 0.2639078199863434,
      "learning_rate": 9.247444266535288e-05,
      "loss": 0.2213,
      "step": 4370
    },
    {
      "epoch": 1.6141063515509602,
      "grad_norm": 0.3134765625,
      "learning_rate": 9.244980908978938e-05,
      "loss": 0.2457,
      "step": 4371
    },
    {
      "epoch": 1.6144756277695715,
      "grad_norm": 0.2824104130268097,
      "learning_rate": 9.24251755142259e-05,
      "loss": 0.2116,
      "step": 4372
    },
    {
      "epoch": 1.6148449039881831,
      "grad_norm": 0.21312622725963593,
      "learning_rate": 9.24005419386624e-05,
      "loss": 0.197,
      "step": 4373
    },
    {
      "epoch": 1.6152141802067947,
      "grad_norm": 0.2423839569091797,
      "learning_rate": 9.23759083630989e-05,
      "loss": 0.2054,
      "step": 4374
    },
    {
      "epoch": 1.6155834564254064,
      "grad_norm": 0.2941286861896515,
      "learning_rate": 9.235127478753542e-05,
      "loss": 0.2178,
      "step": 4375
    },
    {
      "epoch": 1.6159527326440177,
      "grad_norm": 0.28134721517562866,
      "learning_rate": 9.232664121197192e-05,
      "loss": 0.2427,
      "step": 4376
    },
    {
      "epoch": 1.6163220088626291,
      "grad_norm": 0.2671574354171753,
      "learning_rate": 9.230200763640843e-05,
      "loss": 0.2322,
      "step": 4377
    },
    {
      "epoch": 1.6166912850812407,
      "grad_norm": 0.2510659992694855,
      "learning_rate": 9.227737406084493e-05,
      "loss": 0.2012,
      "step": 4378
    },
    {
      "epoch": 1.6170605612998523,
      "grad_norm": 0.28398624062538147,
      "learning_rate": 9.225274048528145e-05,
      "loss": 0.2313,
      "step": 4379
    },
    {
      "epoch": 1.617429837518464,
      "grad_norm": 0.260732501745224,
      "learning_rate": 9.222810690971795e-05,
      "loss": 0.2456,
      "step": 4380
    },
    {
      "epoch": 1.6177991137370753,
      "grad_norm": 0.255723774433136,
      "learning_rate": 9.220347333415445e-05,
      "loss": 0.1865,
      "step": 4381
    },
    {
      "epoch": 1.618168389955687,
      "grad_norm": 0.29838570952415466,
      "learning_rate": 9.217883975859096e-05,
      "loss": 0.2384,
      "step": 4382
    },
    {
      "epoch": 1.6185376661742983,
      "grad_norm": 0.3303617238998413,
      "learning_rate": 9.215420618302746e-05,
      "loss": 0.2708,
      "step": 4383
    },
    {
      "epoch": 1.6189069423929099,
      "grad_norm": 0.3967146873474121,
      "learning_rate": 9.212957260746398e-05,
      "loss": 0.2847,
      "step": 4384
    },
    {
      "epoch": 1.6192762186115215,
      "grad_norm": 0.2737744152545929,
      "learning_rate": 9.210493903190048e-05,
      "loss": 0.2434,
      "step": 4385
    },
    {
      "epoch": 1.619645494830133,
      "grad_norm": 0.3345085382461548,
      "learning_rate": 9.2080305456337e-05,
      "loss": 0.241,
      "step": 4386
    },
    {
      "epoch": 1.6200147710487445,
      "grad_norm": 0.298921138048172,
      "learning_rate": 9.20556718807735e-05,
      "loss": 0.2408,
      "step": 4387
    },
    {
      "epoch": 1.6203840472673559,
      "grad_norm": 0.2851993441581726,
      "learning_rate": 9.203103830521001e-05,
      "loss": 0.2264,
      "step": 4388
    },
    {
      "epoch": 1.6207533234859675,
      "grad_norm": 0.2866692841053009,
      "learning_rate": 9.200640472964651e-05,
      "loss": 0.1903,
      "step": 4389
    },
    {
      "epoch": 1.621122599704579,
      "grad_norm": 0.27634477615356445,
      "learning_rate": 9.198177115408301e-05,
      "loss": 0.1854,
      "step": 4390
    },
    {
      "epoch": 1.6214918759231907,
      "grad_norm": 0.27173739671707153,
      "learning_rate": 9.195713757851953e-05,
      "loss": 0.271,
      "step": 4391
    },
    {
      "epoch": 1.621861152141802,
      "grad_norm": 0.22114601731300354,
      "learning_rate": 9.193250400295603e-05,
      "loss": 0.1995,
      "step": 4392
    },
    {
      "epoch": 1.6222304283604134,
      "grad_norm": 0.25258347392082214,
      "learning_rate": 9.190787042739254e-05,
      "loss": 0.2447,
      "step": 4393
    },
    {
      "epoch": 1.622599704579025,
      "grad_norm": 0.3033640384674072,
      "learning_rate": 9.188323685182904e-05,
      "loss": 0.265,
      "step": 4394
    },
    {
      "epoch": 1.6229689807976366,
      "grad_norm": 0.2696057856082916,
      "learning_rate": 9.185860327626556e-05,
      "loss": 0.2213,
      "step": 4395
    },
    {
      "epoch": 1.6233382570162482,
      "grad_norm": 0.3131449222564697,
      "learning_rate": 9.183396970070206e-05,
      "loss": 0.2536,
      "step": 4396
    },
    {
      "epoch": 1.6237075332348598,
      "grad_norm": 0.28059932589530945,
      "learning_rate": 9.180933612513856e-05,
      "loss": 0.2064,
      "step": 4397
    },
    {
      "epoch": 1.6240768094534712,
      "grad_norm": 0.26027733087539673,
      "learning_rate": 9.178470254957508e-05,
      "loss": 0.2053,
      "step": 4398
    },
    {
      "epoch": 1.6244460856720826,
      "grad_norm": 0.27992936968803406,
      "learning_rate": 9.176006897401158e-05,
      "loss": 0.2565,
      "step": 4399
    },
    {
      "epoch": 1.6248153618906942,
      "grad_norm": 0.26828402280807495,
      "learning_rate": 9.173543539844809e-05,
      "loss": 0.253,
      "step": 4400
    },
    {
      "epoch": 1.6248153618906942,
      "eval_loss": 0.265991747379303,
      "eval_runtime": 5.8558,
      "eval_samples_per_second": 8.539,
      "eval_steps_per_second": 1.195,
      "step": 4400
    },
    {
      "epoch": 1.6251846381093058,
      "grad_norm": 0.285685271024704,
      "learning_rate": 9.171080182288459e-05,
      "loss": 0.2289,
      "step": 4401
    },
    {
      "epoch": 1.6255539143279174,
      "grad_norm": 0.2752537131309509,
      "learning_rate": 9.168616824732111e-05,
      "loss": 0.2482,
      "step": 4402
    },
    {
      "epoch": 1.6259231905465288,
      "grad_norm": 0.3914795219898224,
      "learning_rate": 9.166153467175761e-05,
      "loss": 0.304,
      "step": 4403
    },
    {
      "epoch": 1.6262924667651402,
      "grad_norm": 0.26765310764312744,
      "learning_rate": 9.163690109619412e-05,
      "loss": 0.2098,
      "step": 4404
    },
    {
      "epoch": 1.6266617429837518,
      "grad_norm": 0.2741362154483795,
      "learning_rate": 9.161226752063062e-05,
      "loss": 0.2248,
      "step": 4405
    },
    {
      "epoch": 1.6270310192023634,
      "grad_norm": 0.30598682165145874,
      "learning_rate": 9.158763394506713e-05,
      "loss": 0.2531,
      "step": 4406
    },
    {
      "epoch": 1.627400295420975,
      "grad_norm": 0.2636612057685852,
      "learning_rate": 9.156300036950364e-05,
      "loss": 0.2289,
      "step": 4407
    },
    {
      "epoch": 1.6277695716395866,
      "grad_norm": 0.283623069524765,
      "learning_rate": 9.153836679394014e-05,
      "loss": 0.2506,
      "step": 4408
    },
    {
      "epoch": 1.628138847858198,
      "grad_norm": 0.2700044512748718,
      "learning_rate": 9.151373321837666e-05,
      "loss": 0.1795,
      "step": 4409
    },
    {
      "epoch": 1.6285081240768093,
      "grad_norm": 0.25703635811805725,
      "learning_rate": 9.148909964281316e-05,
      "loss": 0.2389,
      "step": 4410
    },
    {
      "epoch": 1.628877400295421,
      "grad_norm": 0.28995630145072937,
      "learning_rate": 9.146446606724967e-05,
      "loss": 0.2339,
      "step": 4411
    },
    {
      "epoch": 1.6292466765140325,
      "grad_norm": 0.25523653626441956,
      "learning_rate": 9.143983249168617e-05,
      "loss": 0.2343,
      "step": 4412
    },
    {
      "epoch": 1.6296159527326441,
      "grad_norm": 0.36017218232154846,
      "learning_rate": 9.141519891612267e-05,
      "loss": 0.2713,
      "step": 4413
    },
    {
      "epoch": 1.6299852289512555,
      "grad_norm": 0.27270761132240295,
      "learning_rate": 9.139056534055919e-05,
      "loss": 0.2085,
      "step": 4414
    },
    {
      "epoch": 1.630354505169867,
      "grad_norm": 0.2864871621131897,
      "learning_rate": 9.136593176499569e-05,
      "loss": 0.2321,
      "step": 4415
    },
    {
      "epoch": 1.6307237813884785,
      "grad_norm": 0.25306713581085205,
      "learning_rate": 9.13412981894322e-05,
      "loss": 0.2117,
      "step": 4416
    },
    {
      "epoch": 1.6310930576070901,
      "grad_norm": 0.2895197570323944,
      "learning_rate": 9.13166646138687e-05,
      "loss": 0.2356,
      "step": 4417
    },
    {
      "epoch": 1.6314623338257017,
      "grad_norm": 0.2583214044570923,
      "learning_rate": 9.129203103830522e-05,
      "loss": 0.226,
      "step": 4418
    },
    {
      "epoch": 1.631831610044313,
      "grad_norm": 0.23806144297122955,
      "learning_rate": 9.126739746274172e-05,
      "loss": 0.2325,
      "step": 4419
    },
    {
      "epoch": 1.6322008862629247,
      "grad_norm": 0.2492138296365738,
      "learning_rate": 9.124276388717824e-05,
      "loss": 0.1986,
      "step": 4420
    },
    {
      "epoch": 1.632570162481536,
      "grad_norm": 0.2343548685312271,
      "learning_rate": 9.121813031161474e-05,
      "loss": 0.1947,
      "step": 4421
    },
    {
      "epoch": 1.6329394387001477,
      "grad_norm": 0.25394871830940247,
      "learning_rate": 9.119349673605124e-05,
      "loss": 0.2158,
      "step": 4422
    },
    {
      "epoch": 1.6333087149187593,
      "grad_norm": 0.33500936627388,
      "learning_rate": 9.116886316048775e-05,
      "loss": 0.2538,
      "step": 4423
    },
    {
      "epoch": 1.6336779911373709,
      "grad_norm": 0.2588845193386078,
      "learning_rate": 9.114422958492425e-05,
      "loss": 0.2169,
      "step": 4424
    },
    {
      "epoch": 1.6340472673559823,
      "grad_norm": 0.2695541977882385,
      "learning_rate": 9.111959600936077e-05,
      "loss": 0.2016,
      "step": 4425
    },
    {
      "epoch": 1.6344165435745936,
      "grad_norm": 0.3496728837490082,
      "learning_rate": 9.109496243379727e-05,
      "loss": 0.2643,
      "step": 4426
    },
    {
      "epoch": 1.6347858197932053,
      "grad_norm": 0.2309270054101944,
      "learning_rate": 9.107032885823378e-05,
      "loss": 0.1892,
      "step": 4427
    },
    {
      "epoch": 1.6351550960118169,
      "grad_norm": 0.29163655638694763,
      "learning_rate": 9.104569528267028e-05,
      "loss": 0.2454,
      "step": 4428
    },
    {
      "epoch": 1.6355243722304285,
      "grad_norm": 0.2710795998573303,
      "learning_rate": 9.102106170710679e-05,
      "loss": 0.2136,
      "step": 4429
    },
    {
      "epoch": 1.6358936484490398,
      "grad_norm": 0.2789679765701294,
      "learning_rate": 9.09964281315433e-05,
      "loss": 0.2397,
      "step": 4430
    },
    {
      "epoch": 1.6362629246676514,
      "grad_norm": 0.2854340970516205,
      "learning_rate": 9.09717945559798e-05,
      "loss": 0.231,
      "step": 4431
    },
    {
      "epoch": 1.6366322008862628,
      "grad_norm": 0.24197128415107727,
      "learning_rate": 9.094716098041632e-05,
      "loss": 0.1917,
      "step": 4432
    },
    {
      "epoch": 1.6370014771048744,
      "grad_norm": 0.35419929027557373,
      "learning_rate": 9.092252740485282e-05,
      "loss": 0.2305,
      "step": 4433
    },
    {
      "epoch": 1.637370753323486,
      "grad_norm": 0.28594160079956055,
      "learning_rate": 9.089789382928933e-05,
      "loss": 0.2398,
      "step": 4434
    },
    {
      "epoch": 1.6377400295420976,
      "grad_norm": 0.23706410825252533,
      "learning_rate": 9.087326025372583e-05,
      "loss": 0.2111,
      "step": 4435
    },
    {
      "epoch": 1.638109305760709,
      "grad_norm": 0.3336345851421356,
      "learning_rate": 9.084862667816235e-05,
      "loss": 0.3033,
      "step": 4436
    },
    {
      "epoch": 1.6384785819793204,
      "grad_norm": 0.25109750032424927,
      "learning_rate": 9.082399310259885e-05,
      "loss": 0.2006,
      "step": 4437
    },
    {
      "epoch": 1.638847858197932,
      "grad_norm": 0.247293621301651,
      "learning_rate": 9.079935952703535e-05,
      "loss": 0.224,
      "step": 4438
    },
    {
      "epoch": 1.6392171344165436,
      "grad_norm": 0.2591431140899658,
      "learning_rate": 9.077472595147186e-05,
      "loss": 0.2625,
      "step": 4439
    },
    {
      "epoch": 1.6395864106351552,
      "grad_norm": 0.2373046725988388,
      "learning_rate": 9.075009237590837e-05,
      "loss": 0.2196,
      "step": 4440
    },
    {
      "epoch": 1.6399556868537666,
      "grad_norm": 0.277576208114624,
      "learning_rate": 9.072545880034488e-05,
      "loss": 0.2159,
      "step": 4441
    },
    {
      "epoch": 1.6403249630723782,
      "grad_norm": 0.24450775980949402,
      "learning_rate": 9.070082522478138e-05,
      "loss": 0.2184,
      "step": 4442
    },
    {
      "epoch": 1.6406942392909896,
      "grad_norm": 0.2535002827644348,
      "learning_rate": 9.06761916492179e-05,
      "loss": 0.2281,
      "step": 4443
    },
    {
      "epoch": 1.6410635155096012,
      "grad_norm": 0.3445992171764374,
      "learning_rate": 9.06515580736544e-05,
      "loss": 0.2395,
      "step": 4444
    },
    {
      "epoch": 1.6414327917282128,
      "grad_norm": 0.347196102142334,
      "learning_rate": 9.06269244980909e-05,
      "loss": 0.2574,
      "step": 4445
    },
    {
      "epoch": 1.6418020679468244,
      "grad_norm": 0.2402815967798233,
      "learning_rate": 9.060229092252741e-05,
      "loss": 0.2097,
      "step": 4446
    },
    {
      "epoch": 1.6421713441654358,
      "grad_norm": 0.29701727628707886,
      "learning_rate": 9.057765734696391e-05,
      "loss": 0.2728,
      "step": 4447
    },
    {
      "epoch": 1.6425406203840471,
      "grad_norm": 0.2463408261537552,
      "learning_rate": 9.055302377140043e-05,
      "loss": 0.2204,
      "step": 4448
    },
    {
      "epoch": 1.6429098966026587,
      "grad_norm": 0.2880644202232361,
      "learning_rate": 9.052839019583693e-05,
      "loss": 0.2739,
      "step": 4449
    },
    {
      "epoch": 1.6432791728212703,
      "grad_norm": 0.27345898747444153,
      "learning_rate": 9.050375662027344e-05,
      "loss": 0.2796,
      "step": 4450
    },
    {
      "epoch": 1.6432791728212703,
      "eval_loss": 0.2640303075313568,
      "eval_runtime": 5.8618,
      "eval_samples_per_second": 8.53,
      "eval_steps_per_second": 1.194,
      "step": 4450
    },
    {
      "epoch": 1.643648449039882,
      "grad_norm": 0.25137606263160706,
      "learning_rate": 9.047912304470995e-05,
      "loss": 0.239,
      "step": 4451
    },
    {
      "epoch": 1.6440177252584933,
      "grad_norm": 0.2716238498687744,
      "learning_rate": 9.045448946914645e-05,
      "loss": 0.229,
      "step": 4452
    },
    {
      "epoch": 1.644387001477105,
      "grad_norm": 0.2977883517742157,
      "learning_rate": 9.042985589358296e-05,
      "loss": 0.2709,
      "step": 4453
    },
    {
      "epoch": 1.6447562776957163,
      "grad_norm": 0.2766415774822235,
      "learning_rate": 9.040522231801946e-05,
      "loss": 0.2337,
      "step": 4454
    },
    {
      "epoch": 1.645125553914328,
      "grad_norm": 0.2886631488800049,
      "learning_rate": 9.038058874245598e-05,
      "loss": 0.2359,
      "step": 4455
    },
    {
      "epoch": 1.6454948301329395,
      "grad_norm": 0.4014659523963928,
      "learning_rate": 9.035595516689248e-05,
      "loss": 0.2775,
      "step": 4456
    },
    {
      "epoch": 1.6458641063515511,
      "grad_norm": 0.24665050208568573,
      "learning_rate": 9.033132159132899e-05,
      "loss": 0.2314,
      "step": 4457
    },
    {
      "epoch": 1.6462333825701625,
      "grad_norm": 0.2761084735393524,
      "learning_rate": 9.03066880157655e-05,
      "loss": 0.2267,
      "step": 4458
    },
    {
      "epoch": 1.6466026587887739,
      "grad_norm": 0.25735220313072205,
      "learning_rate": 9.028205444020201e-05,
      "loss": 0.2106,
      "step": 4459
    },
    {
      "epoch": 1.6469719350073855,
      "grad_norm": 0.2363380342721939,
      "learning_rate": 9.025742086463851e-05,
      "loss": 0.207,
      "step": 4460
    },
    {
      "epoch": 1.647341211225997,
      "grad_norm": 0.32384592294692993,
      "learning_rate": 9.023278728907501e-05,
      "loss": 0.28,
      "step": 4461
    },
    {
      "epoch": 1.6477104874446087,
      "grad_norm": 0.2280770242214203,
      "learning_rate": 9.020815371351152e-05,
      "loss": 0.2146,
      "step": 4462
    },
    {
      "epoch": 1.64807976366322,
      "grad_norm": 0.24917244911193848,
      "learning_rate": 9.018352013794803e-05,
      "loss": 0.2427,
      "step": 4463
    },
    {
      "epoch": 1.6484490398818314,
      "grad_norm": 0.2832244634628296,
      "learning_rate": 9.015888656238454e-05,
      "loss": 0.2309,
      "step": 4464
    },
    {
      "epoch": 1.648818316100443,
      "grad_norm": 0.2867893576622009,
      "learning_rate": 9.013425298682104e-05,
      "loss": 0.2156,
      "step": 4465
    },
    {
      "epoch": 1.6491875923190547,
      "grad_norm": 0.30194559693336487,
      "learning_rate": 9.010961941125756e-05,
      "loss": 0.2429,
      "step": 4466
    },
    {
      "epoch": 1.6495568685376663,
      "grad_norm": 0.343932181596756,
      "learning_rate": 9.008498583569406e-05,
      "loss": 0.2461,
      "step": 4467
    },
    {
      "epoch": 1.6499261447562779,
      "grad_norm": 0.2459821254014969,
      "learning_rate": 9.006035226013056e-05,
      "loss": 0.2294,
      "step": 4468
    },
    {
      "epoch": 1.6502954209748892,
      "grad_norm": 0.227996826171875,
      "learning_rate": 9.003571868456707e-05,
      "loss": 0.1928,
      "step": 4469
    },
    {
      "epoch": 1.6506646971935006,
      "grad_norm": 0.25969910621643066,
      "learning_rate": 9.001108510900357e-05,
      "loss": 0.2261,
      "step": 4470
    },
    {
      "epoch": 1.6510339734121122,
      "grad_norm": 0.2653331458568573,
      "learning_rate": 8.998645153344009e-05,
      "loss": 0.2167,
      "step": 4471
    },
    {
      "epoch": 1.6514032496307238,
      "grad_norm": 0.22981014847755432,
      "learning_rate": 8.996181795787659e-05,
      "loss": 0.1892,
      "step": 4472
    },
    {
      "epoch": 1.6517725258493354,
      "grad_norm": 0.2953481674194336,
      "learning_rate": 8.99371843823131e-05,
      "loss": 0.2623,
      "step": 4473
    },
    {
      "epoch": 1.6521418020679468,
      "grad_norm": 0.26547351479530334,
      "learning_rate": 8.99125508067496e-05,
      "loss": 0.2397,
      "step": 4474
    },
    {
      "epoch": 1.6525110782865582,
      "grad_norm": 0.23823410272598267,
      "learning_rate": 8.988791723118612e-05,
      "loss": 0.1963,
      "step": 4475
    },
    {
      "epoch": 1.6528803545051698,
      "grad_norm": 0.25190117955207825,
      "learning_rate": 8.986328365562262e-05,
      "loss": 0.2078,
      "step": 4476
    },
    {
      "epoch": 1.6532496307237814,
      "grad_norm": 0.2754233777523041,
      "learning_rate": 8.983865008005912e-05,
      "loss": 0.2287,
      "step": 4477
    },
    {
      "epoch": 1.653618906942393,
      "grad_norm": 0.267267644405365,
      "learning_rate": 8.981401650449564e-05,
      "loss": 0.2004,
      "step": 4478
    },
    {
      "epoch": 1.6539881831610044,
      "grad_norm": 0.24427461624145508,
      "learning_rate": 8.978938292893214e-05,
      "loss": 0.1887,
      "step": 4479
    },
    {
      "epoch": 1.654357459379616,
      "grad_norm": 0.3076989948749542,
      "learning_rate": 8.976474935336865e-05,
      "loss": 0.2569,
      "step": 4480
    },
    {
      "epoch": 1.6547267355982274,
      "grad_norm": 0.2608012855052948,
      "learning_rate": 8.974011577780515e-05,
      "loss": 0.2135,
      "step": 4481
    },
    {
      "epoch": 1.655096011816839,
      "grad_norm": 0.294429749250412,
      "learning_rate": 8.971548220224167e-05,
      "loss": 0.2327,
      "step": 4482
    },
    {
      "epoch": 1.6554652880354506,
      "grad_norm": 0.30032092332839966,
      "learning_rate": 8.969084862667817e-05,
      "loss": 0.2462,
      "step": 4483
    },
    {
      "epoch": 1.6558345642540622,
      "grad_norm": 0.3220147490501404,
      "learning_rate": 8.966621505111467e-05,
      "loss": 0.2502,
      "step": 4484
    },
    {
      "epoch": 1.6562038404726735,
      "grad_norm": 0.23546169698238373,
      "learning_rate": 8.964158147555119e-05,
      "loss": 0.2258,
      "step": 4485
    },
    {
      "epoch": 1.656573116691285,
      "grad_norm": 0.2772619426250458,
      "learning_rate": 8.961694789998769e-05,
      "loss": 0.2546,
      "step": 4486
    },
    {
      "epoch": 1.6569423929098965,
      "grad_norm": 0.28485119342803955,
      "learning_rate": 8.95923143244242e-05,
      "loss": 0.2253,
      "step": 4487
    },
    {
      "epoch": 1.6573116691285081,
      "grad_norm": 0.2849203050136566,
      "learning_rate": 8.95676807488607e-05,
      "loss": 0.2222,
      "step": 4488
    },
    {
      "epoch": 1.6576809453471197,
      "grad_norm": 0.25608864426612854,
      "learning_rate": 8.954304717329722e-05,
      "loss": 0.211,
      "step": 4489
    },
    {
      "epoch": 1.6580502215657311,
      "grad_norm": 0.2595456838607788,
      "learning_rate": 8.951841359773372e-05,
      "loss": 0.2306,
      "step": 4490
    },
    {
      "epoch": 1.6584194977843427,
      "grad_norm": 0.32323145866394043,
      "learning_rate": 8.949378002217023e-05,
      "loss": 0.239,
      "step": 4491
    },
    {
      "epoch": 1.658788774002954,
      "grad_norm": 0.22644315659999847,
      "learning_rate": 8.946914644660673e-05,
      "loss": 0.1855,
      "step": 4492
    },
    {
      "epoch": 1.6591580502215657,
      "grad_norm": 0.2632284164428711,
      "learning_rate": 8.944451287104323e-05,
      "loss": 0.226,
      "step": 4493
    },
    {
      "epoch": 1.6595273264401773,
      "grad_norm": 0.3604695796966553,
      "learning_rate": 8.941987929547975e-05,
      "loss": 0.2408,
      "step": 4494
    },
    {
      "epoch": 1.659896602658789,
      "grad_norm": 0.2264641374349594,
      "learning_rate": 8.939524571991625e-05,
      "loss": 0.2295,
      "step": 4495
    },
    {
      "epoch": 1.6602658788774003,
      "grad_norm": 0.23985488712787628,
      "learning_rate": 8.937061214435276e-05,
      "loss": 0.2187,
      "step": 4496
    },
    {
      "epoch": 1.6606351550960117,
      "grad_norm": 0.27624571323394775,
      "learning_rate": 8.934597856878927e-05,
      "loss": 0.2037,
      "step": 4497
    },
    {
      "epoch": 1.6610044313146233,
      "grad_norm": 0.36503899097442627,
      "learning_rate": 8.932134499322578e-05,
      "loss": 0.3095,
      "step": 4498
    },
    {
      "epoch": 1.6613737075332349,
      "grad_norm": 0.2893148362636566,
      "learning_rate": 8.929671141766228e-05,
      "loss": 0.2332,
      "step": 4499
    },
    {
      "epoch": 1.6617429837518465,
      "grad_norm": 0.2621174156665802,
      "learning_rate": 8.927207784209878e-05,
      "loss": 0.2218,
      "step": 4500
    },
    {
      "epoch": 1.6617429837518465,
      "eval_loss": 0.25753548741340637,
      "eval_runtime": 5.8668,
      "eval_samples_per_second": 8.522,
      "eval_steps_per_second": 1.193,
      "step": 4500
    },
    {
      "epoch": 1.6621122599704579,
      "grad_norm": 0.25062716007232666,
      "learning_rate": 8.92474442665353e-05,
      "loss": 0.2219,
      "step": 4501
    },
    {
      "epoch": 1.6624815361890695,
      "grad_norm": 0.3061050772666931,
      "learning_rate": 8.92228106909718e-05,
      "loss": 0.2455,
      "step": 4502
    },
    {
      "epoch": 1.6628508124076808,
      "grad_norm": 0.272899329662323,
      "learning_rate": 8.919817711540831e-05,
      "loss": 0.2413,
      "step": 4503
    },
    {
      "epoch": 1.6632200886262924,
      "grad_norm": 0.4094938337802887,
      "learning_rate": 8.917354353984481e-05,
      "loss": 0.2343,
      "step": 4504
    },
    {
      "epoch": 1.663589364844904,
      "grad_norm": 0.25752800703048706,
      "learning_rate": 8.914890996428133e-05,
      "loss": 0.2151,
      "step": 4505
    },
    {
      "epoch": 1.6639586410635157,
      "grad_norm": 0.29961711168289185,
      "learning_rate": 8.912427638871783e-05,
      "loss": 0.2309,
      "step": 4506
    },
    {
      "epoch": 1.664327917282127,
      "grad_norm": 0.31224143505096436,
      "learning_rate": 8.909964281315434e-05,
      "loss": 0.2556,
      "step": 4507
    },
    {
      "epoch": 1.6646971935007384,
      "grad_norm": 0.2943941354751587,
      "learning_rate": 8.907500923759085e-05,
      "loss": 0.2823,
      "step": 4508
    },
    {
      "epoch": 1.66506646971935,
      "grad_norm": 0.22451508045196533,
      "learning_rate": 8.905037566202735e-05,
      "loss": 0.1987,
      "step": 4509
    },
    {
      "epoch": 1.6654357459379616,
      "grad_norm": 0.30652108788490295,
      "learning_rate": 8.902574208646386e-05,
      "loss": 0.2419,
      "step": 4510
    },
    {
      "epoch": 1.6658050221565732,
      "grad_norm": 0.2775302827358246,
      "learning_rate": 8.900110851090036e-05,
      "loss": 0.2633,
      "step": 4511
    },
    {
      "epoch": 1.6661742983751846,
      "grad_norm": 0.2944474518299103,
      "learning_rate": 8.897647493533688e-05,
      "loss": 0.2385,
      "step": 4512
    },
    {
      "epoch": 1.6665435745937962,
      "grad_norm": 0.24264825880527496,
      "learning_rate": 8.895184135977338e-05,
      "loss": 0.2241,
      "step": 4513
    },
    {
      "epoch": 1.6669128508124076,
      "grad_norm": 0.24153397977352142,
      "learning_rate": 8.892720778420989e-05,
      "loss": 0.2132,
      "step": 4514
    },
    {
      "epoch": 1.6672821270310192,
      "grad_norm": 0.27261120080947876,
      "learning_rate": 8.89025742086464e-05,
      "loss": 0.1873,
      "step": 4515
    },
    {
      "epoch": 1.6676514032496308,
      "grad_norm": 0.4182679057121277,
      "learning_rate": 8.88779406330829e-05,
      "loss": 0.2904,
      "step": 4516
    },
    {
      "epoch": 1.6680206794682424,
      "grad_norm": 0.3107384443283081,
      "learning_rate": 8.885330705751941e-05,
      "loss": 0.2524,
      "step": 4517
    },
    {
      "epoch": 1.6683899556868538,
      "grad_norm": 0.2381308525800705,
      "learning_rate": 8.882867348195591e-05,
      "loss": 0.2298,
      "step": 4518
    },
    {
      "epoch": 1.6687592319054652,
      "grad_norm": 0.30288082361221313,
      "learning_rate": 8.880403990639243e-05,
      "loss": 0.2311,
      "step": 4519
    },
    {
      "epoch": 1.6691285081240768,
      "grad_norm": 0.2832752466201782,
      "learning_rate": 8.877940633082893e-05,
      "loss": 0.224,
      "step": 4520
    },
    {
      "epoch": 1.6694977843426884,
      "grad_norm": 0.24044345319271088,
      "learning_rate": 8.875477275526544e-05,
      "loss": 0.2119,
      "step": 4521
    },
    {
      "epoch": 1.6698670605613,
      "grad_norm": 0.24596406519412994,
      "learning_rate": 8.873013917970194e-05,
      "loss": 0.2175,
      "step": 4522
    },
    {
      "epoch": 1.6702363367799113,
      "grad_norm": 0.2973885238170624,
      "learning_rate": 8.870550560413846e-05,
      "loss": 0.2842,
      "step": 4523
    },
    {
      "epoch": 1.670605612998523,
      "grad_norm": 0.29346567392349243,
      "learning_rate": 8.868087202857496e-05,
      "loss": 0.2474,
      "step": 4524
    },
    {
      "epoch": 1.6709748892171343,
      "grad_norm": 0.26802217960357666,
      "learning_rate": 8.865623845301146e-05,
      "loss": 0.2219,
      "step": 4525
    },
    {
      "epoch": 1.671344165435746,
      "grad_norm": 0.2719583809375763,
      "learning_rate": 8.863160487744797e-05,
      "loss": 0.2357,
      "step": 4526
    },
    {
      "epoch": 1.6717134416543575,
      "grad_norm": 0.2853793203830719,
      "learning_rate": 8.860697130188447e-05,
      "loss": 0.238,
      "step": 4527
    },
    {
      "epoch": 1.6720827178729691,
      "grad_norm": 0.3272760808467865,
      "learning_rate": 8.858233772632099e-05,
      "loss": 0.2365,
      "step": 4528
    },
    {
      "epoch": 1.6724519940915805,
      "grad_norm": 0.23719562590122223,
      "learning_rate": 8.855770415075748e-05,
      "loss": 0.2154,
      "step": 4529
    },
    {
      "epoch": 1.672821270310192,
      "grad_norm": 0.30899596214294434,
      "learning_rate": 8.853307057519399e-05,
      "loss": 0.2706,
      "step": 4530
    },
    {
      "epoch": 1.6731905465288035,
      "grad_norm": 0.2901187241077423,
      "learning_rate": 8.850843699963049e-05,
      "loss": 0.2165,
      "step": 4531
    },
    {
      "epoch": 1.673559822747415,
      "grad_norm": 0.21402893960475922,
      "learning_rate": 8.848380342406701e-05,
      "loss": 0.1978,
      "step": 4532
    },
    {
      "epoch": 1.6739290989660267,
      "grad_norm": 0.23306670784950256,
      "learning_rate": 8.845916984850351e-05,
      "loss": 0.2095,
      "step": 4533
    },
    {
      "epoch": 1.674298375184638,
      "grad_norm": 0.3484254777431488,
      "learning_rate": 8.843453627294001e-05,
      "loss": 0.2216,
      "step": 4534
    },
    {
      "epoch": 1.6746676514032495,
      "grad_norm": 0.26495423913002014,
      "learning_rate": 8.840990269737652e-05,
      "loss": 0.1954,
      "step": 4535
    },
    {
      "epoch": 1.675036927621861,
      "grad_norm": 0.2986883819103241,
      "learning_rate": 8.838526912181303e-05,
      "loss": 0.2434,
      "step": 4536
    },
    {
      "epoch": 1.6754062038404727,
      "grad_norm": 0.35357728600502014,
      "learning_rate": 8.836063554624954e-05,
      "loss": 0.2083,
      "step": 4537
    },
    {
      "epoch": 1.6757754800590843,
      "grad_norm": 0.2808247208595276,
      "learning_rate": 8.833600197068604e-05,
      "loss": 0.2356,
      "step": 4538
    },
    {
      "epoch": 1.6761447562776959,
      "grad_norm": 0.30437755584716797,
      "learning_rate": 8.831136839512256e-05,
      "loss": 0.2718,
      "step": 4539
    },
    {
      "epoch": 1.6765140324963073,
      "grad_norm": 0.25901874899864197,
      "learning_rate": 8.828673481955906e-05,
      "loss": 0.1908,
      "step": 4540
    },
    {
      "epoch": 1.6768833087149186,
      "grad_norm": 0.22702591121196747,
      "learning_rate": 8.826210124399557e-05,
      "loss": 0.1957,
      "step": 4541
    },
    {
      "epoch": 1.6772525849335302,
      "grad_norm": 0.32408803701400757,
      "learning_rate": 8.823746766843207e-05,
      "loss": 0.2264,
      "step": 4542
    },
    {
      "epoch": 1.6776218611521418,
      "grad_norm": 0.2785623371601105,
      "learning_rate": 8.821283409286857e-05,
      "loss": 0.2275,
      "step": 4543
    },
    {
      "epoch": 1.6779911373707534,
      "grad_norm": 0.2758251428604126,
      "learning_rate": 8.818820051730509e-05,
      "loss": 0.2208,
      "step": 4544
    },
    {
      "epoch": 1.6783604135893648,
      "grad_norm": 0.25355419516563416,
      "learning_rate": 8.816356694174159e-05,
      "loss": 0.2387,
      "step": 4545
    },
    {
      "epoch": 1.6787296898079762,
      "grad_norm": 0.24101129174232483,
      "learning_rate": 8.81389333661781e-05,
      "loss": 0.1959,
      "step": 4546
    },
    {
      "epoch": 1.6790989660265878,
      "grad_norm": 0.27683645486831665,
      "learning_rate": 8.81142997906146e-05,
      "loss": 0.2197,
      "step": 4547
    },
    {
      "epoch": 1.6794682422451994,
      "grad_norm": 0.25528275966644287,
      "learning_rate": 8.808966621505112e-05,
      "loss": 0.209,
      "step": 4548
    },
    {
      "epoch": 1.679837518463811,
      "grad_norm": 0.2559058368206024,
      "learning_rate": 8.806503263948762e-05,
      "loss": 0.1847,
      "step": 4549
    },
    {
      "epoch": 1.6802067946824224,
      "grad_norm": 0.30595335364341736,
      "learning_rate": 8.804039906392412e-05,
      "loss": 0.2645,
      "step": 4550
    },
    {
      "epoch": 1.6802067946824224,
      "eval_loss": 0.2636949419975281,
      "eval_runtime": 5.8582,
      "eval_samples_per_second": 8.535,
      "eval_steps_per_second": 1.195,
      "step": 4550
    },
    {
      "epoch": 1.680576070901034,
      "grad_norm": 0.27721020579338074,
      "learning_rate": 8.801576548836064e-05,
      "loss": 0.2319,
      "step": 4551
    },
    {
      "epoch": 1.6809453471196454,
      "grad_norm": 0.2904880940914154,
      "learning_rate": 8.799113191279714e-05,
      "loss": 0.2272,
      "step": 4552
    },
    {
      "epoch": 1.681314623338257,
      "grad_norm": 0.293811172246933,
      "learning_rate": 8.796649833723365e-05,
      "loss": 0.2521,
      "step": 4553
    },
    {
      "epoch": 1.6816838995568686,
      "grad_norm": 0.26758673787117004,
      "learning_rate": 8.794186476167015e-05,
      "loss": 0.2028,
      "step": 4554
    },
    {
      "epoch": 1.6820531757754802,
      "grad_norm": 0.27245649695396423,
      "learning_rate": 8.791723118610667e-05,
      "loss": 0.2471,
      "step": 4555
    },
    {
      "epoch": 1.6824224519940916,
      "grad_norm": 0.27987295389175415,
      "learning_rate": 8.789259761054317e-05,
      "loss": 0.2391,
      "step": 4556
    },
    {
      "epoch": 1.682791728212703,
      "grad_norm": 0.2732281982898712,
      "learning_rate": 8.786796403497968e-05,
      "loss": 0.268,
      "step": 4557
    },
    {
      "epoch": 1.6831610044313146,
      "grad_norm": 0.2969675362110138,
      "learning_rate": 8.784333045941618e-05,
      "loss": 0.2581,
      "step": 4558
    },
    {
      "epoch": 1.6835302806499262,
      "grad_norm": 0.41811317205429077,
      "learning_rate": 8.781869688385269e-05,
      "loss": 0.3046,
      "step": 4559
    },
    {
      "epoch": 1.6838995568685378,
      "grad_norm": 0.2814512848854065,
      "learning_rate": 8.77940633082892e-05,
      "loss": 0.2348,
      "step": 4560
    },
    {
      "epoch": 1.6842688330871491,
      "grad_norm": 0.2987380027770996,
      "learning_rate": 8.77694297327257e-05,
      "loss": 0.232,
      "step": 4561
    },
    {
      "epoch": 1.6846381093057607,
      "grad_norm": 0.3873238265514374,
      "learning_rate": 8.774479615716222e-05,
      "loss": 0.2471,
      "step": 4562
    },
    {
      "epoch": 1.6850073855243721,
      "grad_norm": 0.28666067123413086,
      "learning_rate": 8.772016258159872e-05,
      "loss": 0.2145,
      "step": 4563
    },
    {
      "epoch": 1.6853766617429837,
      "grad_norm": 0.28261733055114746,
      "learning_rate": 8.769552900603523e-05,
      "loss": 0.2161,
      "step": 4564
    },
    {
      "epoch": 1.6857459379615953,
      "grad_norm": 0.29818618297576904,
      "learning_rate": 8.767089543047173e-05,
      "loss": 0.2175,
      "step": 4565
    },
    {
      "epoch": 1.686115214180207,
      "grad_norm": 0.265331506729126,
      "learning_rate": 8.764626185490823e-05,
      "loss": 0.1898,
      "step": 4566
    },
    {
      "epoch": 1.6864844903988183,
      "grad_norm": 0.24167734384536743,
      "learning_rate": 8.762162827934475e-05,
      "loss": 0.2373,
      "step": 4567
    },
    {
      "epoch": 1.6868537666174297,
      "grad_norm": 0.2740474045276642,
      "learning_rate": 8.759699470378125e-05,
      "loss": 0.2291,
      "step": 4568
    },
    {
      "epoch": 1.6872230428360413,
      "grad_norm": 0.2038453072309494,
      "learning_rate": 8.757236112821776e-05,
      "loss": 0.1733,
      "step": 4569
    },
    {
      "epoch": 1.687592319054653,
      "grad_norm": 0.2524508535861969,
      "learning_rate": 8.754772755265427e-05,
      "loss": 0.2104,
      "step": 4570
    },
    {
      "epoch": 1.6879615952732645,
      "grad_norm": 0.24824324250221252,
      "learning_rate": 8.752309397709078e-05,
      "loss": 0.2092,
      "step": 4571
    },
    {
      "epoch": 1.6883308714918759,
      "grad_norm": 0.2843952476978302,
      "learning_rate": 8.749846040152728e-05,
      "loss": 0.2103,
      "step": 4572
    },
    {
      "epoch": 1.6887001477104875,
      "grad_norm": 0.2717718482017517,
      "learning_rate": 8.74738268259638e-05,
      "loss": 0.2165,
      "step": 4573
    },
    {
      "epoch": 1.6890694239290989,
      "grad_norm": 0.2635425627231598,
      "learning_rate": 8.74491932504003e-05,
      "loss": 0.1931,
      "step": 4574
    },
    {
      "epoch": 1.6894387001477105,
      "grad_norm": 0.23467527329921722,
      "learning_rate": 8.74245596748368e-05,
      "loss": 0.2161,
      "step": 4575
    },
    {
      "epoch": 1.689807976366322,
      "grad_norm": 0.25482845306396484,
      "learning_rate": 8.739992609927331e-05,
      "loss": 0.2205,
      "step": 4576
    },
    {
      "epoch": 1.6901772525849337,
      "grad_norm": 0.2853875160217285,
      "learning_rate": 8.737529252370981e-05,
      "loss": 0.2433,
      "step": 4577
    },
    {
      "epoch": 1.690546528803545,
      "grad_norm": 0.24550633132457733,
      "learning_rate": 8.735065894814633e-05,
      "loss": 0.2229,
      "step": 4578
    },
    {
      "epoch": 1.6909158050221564,
      "grad_norm": 0.2933661937713623,
      "learning_rate": 8.732602537258283e-05,
      "loss": 0.245,
      "step": 4579
    },
    {
      "epoch": 1.691285081240768,
      "grad_norm": 0.28975528478622437,
      "learning_rate": 8.730139179701934e-05,
      "loss": 0.2349,
      "step": 4580
    },
    {
      "epoch": 1.6916543574593796,
      "grad_norm": 0.23025333881378174,
      "learning_rate": 8.727675822145585e-05,
      "loss": 0.185,
      "step": 4581
    },
    {
      "epoch": 1.6920236336779912,
      "grad_norm": 0.30284446477890015,
      "learning_rate": 8.725212464589235e-05,
      "loss": 0.2706,
      "step": 4582
    },
    {
      "epoch": 1.6923929098966026,
      "grad_norm": 0.2338089495897293,
      "learning_rate": 8.722749107032886e-05,
      "loss": 0.2031,
      "step": 4583
    },
    {
      "epoch": 1.6927621861152142,
      "grad_norm": 0.3378913700580597,
      "learning_rate": 8.720285749476536e-05,
      "loss": 0.2438,
      "step": 4584
    },
    {
      "epoch": 1.6931314623338256,
      "grad_norm": 0.2701866328716278,
      "learning_rate": 8.717822391920188e-05,
      "loss": 0.232,
      "step": 4585
    },
    {
      "epoch": 1.6935007385524372,
      "grad_norm": 0.2801609933376312,
      "learning_rate": 8.715359034363838e-05,
      "loss": 0.236,
      "step": 4586
    },
    {
      "epoch": 1.6938700147710488,
      "grad_norm": 0.21978351473808289,
      "learning_rate": 8.712895676807489e-05,
      "loss": 0.19,
      "step": 4587
    },
    {
      "epoch": 1.6942392909896604,
      "grad_norm": 0.3093269169330597,
      "learning_rate": 8.71043231925114e-05,
      "loss": 0.2268,
      "step": 4588
    },
    {
      "epoch": 1.6946085672082718,
      "grad_norm": 0.31482091546058655,
      "learning_rate": 8.707968961694791e-05,
      "loss": 0.2468,
      "step": 4589
    },
    {
      "epoch": 1.6949778434268832,
      "grad_norm": 0.27576708793640137,
      "learning_rate": 8.705505604138441e-05,
      "loss": 0.2307,
      "step": 4590
    },
    {
      "epoch": 1.6953471196454948,
      "grad_norm": 0.2709692716598511,
      "learning_rate": 8.703042246582091e-05,
      "loss": 0.2458,
      "step": 4591
    },
    {
      "epoch": 1.6957163958641064,
      "grad_norm": 0.21820217370986938,
      "learning_rate": 8.700578889025742e-05,
      "loss": 0.2004,
      "step": 4592
    },
    {
      "epoch": 1.696085672082718,
      "grad_norm": 0.27680477499961853,
      "learning_rate": 8.698115531469393e-05,
      "loss": 0.2415,
      "step": 4593
    },
    {
      "epoch": 1.6964549483013294,
      "grad_norm": 0.29179415106773376,
      "learning_rate": 8.695652173913044e-05,
      "loss": 0.2502,
      "step": 4594
    },
    {
      "epoch": 1.696824224519941,
      "grad_norm": 0.24993900954723358,
      "learning_rate": 8.693188816356694e-05,
      "loss": 0.2044,
      "step": 4595
    },
    {
      "epoch": 1.6971935007385524,
      "grad_norm": 0.2431059628725052,
      "learning_rate": 8.690725458800346e-05,
      "loss": 0.2139,
      "step": 4596
    },
    {
      "epoch": 1.697562776957164,
      "grad_norm": 0.26724016666412354,
      "learning_rate": 8.688262101243996e-05,
      "loss": 0.22,
      "step": 4597
    },
    {
      "epoch": 1.6979320531757756,
      "grad_norm": 0.33457979559898376,
      "learning_rate": 8.685798743687646e-05,
      "loss": 0.2887,
      "step": 4598
    },
    {
      "epoch": 1.6983013293943872,
      "grad_norm": 0.40689313411712646,
      "learning_rate": 8.683335386131297e-05,
      "loss": 0.2475,
      "step": 4599
    },
    {
      "epoch": 1.6986706056129985,
      "grad_norm": 0.2402809113264084,
      "learning_rate": 8.680872028574947e-05,
      "loss": 0.1933,
      "step": 4600
    },
    {
      "epoch": 1.6986706056129985,
      "eval_loss": 0.26016414165496826,
      "eval_runtime": 5.8648,
      "eval_samples_per_second": 8.525,
      "eval_steps_per_second": 1.194,
      "step": 4600
    },
    {
      "epoch": 1.69903988183161,
      "grad_norm": 0.2964528799057007,
      "learning_rate": 8.678408671018599e-05,
      "loss": 0.2453,
      "step": 4601
    },
    {
      "epoch": 1.6994091580502215,
      "grad_norm": 0.28239455819129944,
      "learning_rate": 8.675945313462249e-05,
      "loss": 0.2347,
      "step": 4602
    },
    {
      "epoch": 1.6997784342688331,
      "grad_norm": 0.24995984137058258,
      "learning_rate": 8.6734819559059e-05,
      "loss": 0.2098,
      "step": 4603
    },
    {
      "epoch": 1.7001477104874447,
      "grad_norm": 0.3133336901664734,
      "learning_rate": 8.67101859834955e-05,
      "loss": 0.2285,
      "step": 4604
    },
    {
      "epoch": 1.700516986706056,
      "grad_norm": 0.2739347517490387,
      "learning_rate": 8.668555240793201e-05,
      "loss": 0.2346,
      "step": 4605
    },
    {
      "epoch": 1.7008862629246675,
      "grad_norm": 0.3014254570007324,
      "learning_rate": 8.666091883236852e-05,
      "loss": 0.2354,
      "step": 4606
    },
    {
      "epoch": 1.701255539143279,
      "grad_norm": 0.26933860778808594,
      "learning_rate": 8.663628525680502e-05,
      "loss": 0.2298,
      "step": 4607
    },
    {
      "epoch": 1.7016248153618907,
      "grad_norm": 0.2875133156776428,
      "learning_rate": 8.661165168124154e-05,
      "loss": 0.2197,
      "step": 4608
    },
    {
      "epoch": 1.7019940915805023,
      "grad_norm": 0.26846441626548767,
      "learning_rate": 8.658701810567804e-05,
      "loss": 0.2189,
      "step": 4609
    },
    {
      "epoch": 1.702363367799114,
      "grad_norm": 0.2907378673553467,
      "learning_rate": 8.656238453011455e-05,
      "loss": 0.226,
      "step": 4610
    },
    {
      "epoch": 1.7027326440177253,
      "grad_norm": 0.3114635646343231,
      "learning_rate": 8.653775095455105e-05,
      "loss": 0.2558,
      "step": 4611
    },
    {
      "epoch": 1.7031019202363367,
      "grad_norm": 0.2575693726539612,
      "learning_rate": 8.651311737898757e-05,
      "loss": 0.2317,
      "step": 4612
    },
    {
      "epoch": 1.7034711964549483,
      "grad_norm": 0.24758966267108917,
      "learning_rate": 8.648848380342407e-05,
      "loss": 0.1889,
      "step": 4613
    },
    {
      "epoch": 1.7038404726735599,
      "grad_norm": 0.28485026955604553,
      "learning_rate": 8.646385022786057e-05,
      "loss": 0.2215,
      "step": 4614
    },
    {
      "epoch": 1.7042097488921715,
      "grad_norm": 0.2999398410320282,
      "learning_rate": 8.643921665229709e-05,
      "loss": 0.2394,
      "step": 4615
    },
    {
      "epoch": 1.7045790251107829,
      "grad_norm": 0.2679917514324188,
      "learning_rate": 8.641458307673359e-05,
      "loss": 0.2249,
      "step": 4616
    },
    {
      "epoch": 1.7049483013293942,
      "grad_norm": 0.24768604338169098,
      "learning_rate": 8.63899495011701e-05,
      "loss": 0.2063,
      "step": 4617
    },
    {
      "epoch": 1.7053175775480058,
      "grad_norm": 0.31003034114837646,
      "learning_rate": 8.63653159256066e-05,
      "loss": 0.2638,
      "step": 4618
    },
    {
      "epoch": 1.7056868537666174,
      "grad_norm": 0.2762015163898468,
      "learning_rate": 8.634068235004312e-05,
      "loss": 0.2248,
      "step": 4619
    },
    {
      "epoch": 1.706056129985229,
      "grad_norm": 0.28417354822158813,
      "learning_rate": 8.631604877447962e-05,
      "loss": 0.2207,
      "step": 4620
    },
    {
      "epoch": 1.7064254062038404,
      "grad_norm": 0.24697527289390564,
      "learning_rate": 8.629141519891612e-05,
      "loss": 0.1839,
      "step": 4621
    },
    {
      "epoch": 1.706794682422452,
      "grad_norm": 0.239962637424469,
      "learning_rate": 8.626678162335263e-05,
      "loss": 0.1851,
      "step": 4622
    },
    {
      "epoch": 1.7071639586410634,
      "grad_norm": 0.2700730860233307,
      "learning_rate": 8.624214804778913e-05,
      "loss": 0.2132,
      "step": 4623
    },
    {
      "epoch": 1.707533234859675,
      "grad_norm": 0.3187514543533325,
      "learning_rate": 8.621751447222565e-05,
      "loss": 0.241,
      "step": 4624
    },
    {
      "epoch": 1.7079025110782866,
      "grad_norm": 0.33720922470092773,
      "learning_rate": 8.619288089666215e-05,
      "loss": 0.2846,
      "step": 4625
    },
    {
      "epoch": 1.7082717872968982,
      "grad_norm": 0.28173840045928955,
      "learning_rate": 8.616824732109867e-05,
      "loss": 0.1962,
      "step": 4626
    },
    {
      "epoch": 1.7086410635155096,
      "grad_norm": 0.24108169972896576,
      "learning_rate": 8.614361374553517e-05,
      "loss": 0.2244,
      "step": 4627
    },
    {
      "epoch": 1.709010339734121,
      "grad_norm": 0.2797034978866577,
      "learning_rate": 8.611898016997168e-05,
      "loss": 0.2327,
      "step": 4628
    },
    {
      "epoch": 1.7093796159527326,
      "grad_norm": 0.22712896764278412,
      "learning_rate": 8.609434659440818e-05,
      "loss": 0.2033,
      "step": 4629
    },
    {
      "epoch": 1.7097488921713442,
      "grad_norm": 0.22765718400478363,
      "learning_rate": 8.606971301884468e-05,
      "loss": 0.2111,
      "step": 4630
    },
    {
      "epoch": 1.7101181683899558,
      "grad_norm": 0.24808034300804138,
      "learning_rate": 8.60450794432812e-05,
      "loss": 0.231,
      "step": 4631
    },
    {
      "epoch": 1.7104874446085672,
      "grad_norm": 0.3694831132888794,
      "learning_rate": 8.60204458677177e-05,
      "loss": 0.2653,
      "step": 4632
    },
    {
      "epoch": 1.7108567208271788,
      "grad_norm": 0.2156008630990982,
      "learning_rate": 8.599581229215421e-05,
      "loss": 0.1874,
      "step": 4633
    },
    {
      "epoch": 1.7112259970457901,
      "grad_norm": 0.285347580909729,
      "learning_rate": 8.597117871659071e-05,
      "loss": 0.2487,
      "step": 4634
    },
    {
      "epoch": 1.7115952732644018,
      "grad_norm": 0.3789399564266205,
      "learning_rate": 8.594654514102723e-05,
      "loss": 0.2251,
      "step": 4635
    },
    {
      "epoch": 1.7119645494830134,
      "grad_norm": 0.3379298746585846,
      "learning_rate": 8.592191156546373e-05,
      "loss": 0.2731,
      "step": 4636
    },
    {
      "epoch": 1.712333825701625,
      "grad_norm": 0.22673995792865753,
      "learning_rate": 8.589727798990023e-05,
      "loss": 0.198,
      "step": 4637
    },
    {
      "epoch": 1.7127031019202363,
      "grad_norm": 0.393801748752594,
      "learning_rate": 8.587264441433675e-05,
      "loss": 0.2793,
      "step": 4638
    },
    {
      "epoch": 1.7130723781388477,
      "grad_norm": 0.27242693305015564,
      "learning_rate": 8.584801083877325e-05,
      "loss": 0.2253,
      "step": 4639
    },
    {
      "epoch": 1.7134416543574593,
      "grad_norm": 0.2874417304992676,
      "learning_rate": 8.582337726320976e-05,
      "loss": 0.2258,
      "step": 4640
    },
    {
      "epoch": 1.713810930576071,
      "grad_norm": 0.2719419002532959,
      "learning_rate": 8.579874368764626e-05,
      "loss": 0.2354,
      "step": 4641
    },
    {
      "epoch": 1.7141802067946825,
      "grad_norm": 0.28539299964904785,
      "learning_rate": 8.577411011208278e-05,
      "loss": 0.2446,
      "step": 4642
    },
    {
      "epoch": 1.714549483013294,
      "grad_norm": 0.2207421213388443,
      "learning_rate": 8.574947653651928e-05,
      "loss": 0.2098,
      "step": 4643
    },
    {
      "epoch": 1.7149187592319055,
      "grad_norm": 0.2763689458370209,
      "learning_rate": 8.57248429609558e-05,
      "loss": 0.1912,
      "step": 4644
    },
    {
      "epoch": 1.715288035450517,
      "grad_norm": 0.28716400265693665,
      "learning_rate": 8.57002093853923e-05,
      "loss": 0.2179,
      "step": 4645
    },
    {
      "epoch": 1.7156573116691285,
      "grad_norm": 0.28598442673683167,
      "learning_rate": 8.56755758098288e-05,
      "loss": 0.2423,
      "step": 4646
    },
    {
      "epoch": 1.71602658788774,
      "grad_norm": 0.2622353434562683,
      "learning_rate": 8.565094223426531e-05,
      "loss": 0.2281,
      "step": 4647
    },
    {
      "epoch": 1.7163958641063517,
      "grad_norm": 0.3016565144062042,
      "learning_rate": 8.562630865870181e-05,
      "loss": 0.2634,
      "step": 4648
    },
    {
      "epoch": 1.716765140324963,
      "grad_norm": 0.3286326825618744,
      "learning_rate": 8.560167508313833e-05,
      "loss": 0.2699,
      "step": 4649
    },
    {
      "epoch": 1.7171344165435745,
      "grad_norm": 0.2957834005355835,
      "learning_rate": 8.557704150757483e-05,
      "loss": 0.205,
      "step": 4650
    },
    {
      "epoch": 1.7171344165435745,
      "eval_loss": 0.2559199631214142,
      "eval_runtime": 5.8598,
      "eval_samples_per_second": 8.533,
      "eval_steps_per_second": 1.195,
      "step": 4650
    },
    {
      "epoch": 1.717503692762186,
      "grad_norm": 0.24889697134494781,
      "learning_rate": 8.555240793201134e-05,
      "loss": 0.196,
      "step": 4651
    },
    {
      "epoch": 1.7178729689807977,
      "grad_norm": 0.2743191421031952,
      "learning_rate": 8.552777435644784e-05,
      "loss": 0.2563,
      "step": 4652
    },
    {
      "epoch": 1.7182422451994093,
      "grad_norm": 0.27960363030433655,
      "learning_rate": 8.550314078088434e-05,
      "loss": 0.2355,
      "step": 4653
    },
    {
      "epoch": 1.7186115214180206,
      "grad_norm": 0.30967599153518677,
      "learning_rate": 8.547850720532086e-05,
      "loss": 0.2272,
      "step": 4654
    },
    {
      "epoch": 1.7189807976366323,
      "grad_norm": 0.2617909908294678,
      "learning_rate": 8.545387362975736e-05,
      "loss": 0.1914,
      "step": 4655
    },
    {
      "epoch": 1.7193500738552436,
      "grad_norm": 0.2783837914466858,
      "learning_rate": 8.542924005419387e-05,
      "loss": 0.2323,
      "step": 4656
    },
    {
      "epoch": 1.7197193500738552,
      "grad_norm": 0.39419353008270264,
      "learning_rate": 8.540460647863038e-05,
      "loss": 0.2028,
      "step": 4657
    },
    {
      "epoch": 1.7200886262924668,
      "grad_norm": 0.28697970509529114,
      "learning_rate": 8.537997290306689e-05,
      "loss": 0.2222,
      "step": 4658
    },
    {
      "epoch": 1.7204579025110784,
      "grad_norm": 0.29508844017982483,
      "learning_rate": 8.535533932750339e-05,
      "loss": 0.2427,
      "step": 4659
    },
    {
      "epoch": 1.7208271787296898,
      "grad_norm": 0.22784453630447388,
      "learning_rate": 8.53307057519399e-05,
      "loss": 0.1948,
      "step": 4660
    },
    {
      "epoch": 1.7211964549483012,
      "grad_norm": 0.26522296667099,
      "learning_rate": 8.53060721763764e-05,
      "loss": 0.2431,
      "step": 4661
    },
    {
      "epoch": 1.7215657311669128,
      "grad_norm": 0.3083113431930542,
      "learning_rate": 8.528143860081291e-05,
      "loss": 0.231,
      "step": 4662
    },
    {
      "epoch": 1.7219350073855244,
      "grad_norm": 0.2493920624256134,
      "learning_rate": 8.525680502524942e-05,
      "loss": 0.2212,
      "step": 4663
    },
    {
      "epoch": 1.722304283604136,
      "grad_norm": 0.24538254737854004,
      "learning_rate": 8.523217144968592e-05,
      "loss": 0.2517,
      "step": 4664
    },
    {
      "epoch": 1.7226735598227474,
      "grad_norm": 0.2629144489765167,
      "learning_rate": 8.520753787412244e-05,
      "loss": 0.2157,
      "step": 4665
    },
    {
      "epoch": 1.7230428360413588,
      "grad_norm": 0.22240932285785675,
      "learning_rate": 8.518290429855894e-05,
      "loss": 0.2052,
      "step": 4666
    },
    {
      "epoch": 1.7234121122599704,
      "grad_norm": 0.9502313137054443,
      "learning_rate": 8.515827072299545e-05,
      "loss": 0.2784,
      "step": 4667
    },
    {
      "epoch": 1.723781388478582,
      "grad_norm": 0.27637672424316406,
      "learning_rate": 8.513363714743195e-05,
      "loss": 0.2267,
      "step": 4668
    },
    {
      "epoch": 1.7241506646971936,
      "grad_norm": 0.307858943939209,
      "learning_rate": 8.510900357186846e-05,
      "loss": 0.2254,
      "step": 4669
    },
    {
      "epoch": 1.7245199409158052,
      "grad_norm": 0.31493571400642395,
      "learning_rate": 8.508436999630497e-05,
      "loss": 0.2913,
      "step": 4670
    },
    {
      "epoch": 1.7248892171344166,
      "grad_norm": 0.28521549701690674,
      "learning_rate": 8.505973642074147e-05,
      "loss": 0.2528,
      "step": 4671
    },
    {
      "epoch": 1.725258493353028,
      "grad_norm": 0.3285808265209198,
      "learning_rate": 8.503510284517799e-05,
      "loss": 0.2496,
      "step": 4672
    },
    {
      "epoch": 1.7256277695716395,
      "grad_norm": 0.27094313502311707,
      "learning_rate": 8.501046926961449e-05,
      "loss": 0.2302,
      "step": 4673
    },
    {
      "epoch": 1.7259970457902511,
      "grad_norm": 0.3215968906879425,
      "learning_rate": 8.4985835694051e-05,
      "loss": 0.245,
      "step": 4674
    },
    {
      "epoch": 1.7263663220088628,
      "grad_norm": 0.2997489273548126,
      "learning_rate": 8.49612021184875e-05,
      "loss": 0.2155,
      "step": 4675
    },
    {
      "epoch": 1.7267355982274741,
      "grad_norm": 0.3190780580043793,
      "learning_rate": 8.493656854292402e-05,
      "loss": 0.3006,
      "step": 4676
    },
    {
      "epoch": 1.7271048744460855,
      "grad_norm": 0.23564372956752777,
      "learning_rate": 8.491193496736052e-05,
      "loss": 0.2057,
      "step": 4677
    },
    {
      "epoch": 1.7274741506646971,
      "grad_norm": 0.27775612473487854,
      "learning_rate": 8.488730139179702e-05,
      "loss": 0.2498,
      "step": 4678
    },
    {
      "epoch": 1.7278434268833087,
      "grad_norm": 0.40059298276901245,
      "learning_rate": 8.486266781623353e-05,
      "loss": 0.3065,
      "step": 4679
    },
    {
      "epoch": 1.7282127031019203,
      "grad_norm": 0.25466418266296387,
      "learning_rate": 8.483803424067004e-05,
      "loss": 0.1927,
      "step": 4680
    },
    {
      "epoch": 1.7285819793205317,
      "grad_norm": 0.300536572933197,
      "learning_rate": 8.481340066510655e-05,
      "loss": 0.2383,
      "step": 4681
    },
    {
      "epoch": 1.7289512555391433,
      "grad_norm": 0.234557643532753,
      "learning_rate": 8.478876708954305e-05,
      "loss": 0.2091,
      "step": 4682
    },
    {
      "epoch": 1.7293205317577547,
      "grad_norm": 0.24355682730674744,
      "learning_rate": 8.476413351397957e-05,
      "loss": 0.2011,
      "step": 4683
    },
    {
      "epoch": 1.7296898079763663,
      "grad_norm": 0.28211480379104614,
      "learning_rate": 8.473949993841607e-05,
      "loss": 0.2605,
      "step": 4684
    },
    {
      "epoch": 1.730059084194978,
      "grad_norm": 0.27587154507637024,
      "learning_rate": 8.471486636285257e-05,
      "loss": 0.196,
      "step": 4685
    },
    {
      "epoch": 1.7304283604135895,
      "grad_norm": 0.2767505943775177,
      "learning_rate": 8.469023278728908e-05,
      "loss": 0.2479,
      "step": 4686
    },
    {
      "epoch": 1.7307976366322009,
      "grad_norm": 0.27924713492393494,
      "learning_rate": 8.466559921172558e-05,
      "loss": 0.226,
      "step": 4687
    },
    {
      "epoch": 1.7311669128508123,
      "grad_norm": 0.27785757184028625,
      "learning_rate": 8.46409656361621e-05,
      "loss": 0.2573,
      "step": 4688
    },
    {
      "epoch": 1.7315361890694239,
      "grad_norm": 0.28567370772361755,
      "learning_rate": 8.46163320605986e-05,
      "loss": 0.2321,
      "step": 4689
    },
    {
      "epoch": 1.7319054652880355,
      "grad_norm": 0.25874269008636475,
      "learning_rate": 8.459169848503511e-05,
      "loss": 0.2262,
      "step": 4690
    },
    {
      "epoch": 1.732274741506647,
      "grad_norm": 0.2584947645664215,
      "learning_rate": 8.456706490947162e-05,
      "loss": 0.2258,
      "step": 4691
    },
    {
      "epoch": 1.7326440177252584,
      "grad_norm": 0.29869017004966736,
      "learning_rate": 8.454243133390812e-05,
      "loss": 0.2681,
      "step": 4692
    },
    {
      "epoch": 1.73301329394387,
      "grad_norm": 0.27775976061820984,
      "learning_rate": 8.451779775834463e-05,
      "loss": 0.226,
      "step": 4693
    },
    {
      "epoch": 1.7333825701624814,
      "grad_norm": 0.2534462511539459,
      "learning_rate": 8.449316418278113e-05,
      "loss": 0.2036,
      "step": 4694
    },
    {
      "epoch": 1.733751846381093,
      "grad_norm": 0.2533627152442932,
      "learning_rate": 8.446853060721765e-05,
      "loss": 0.1978,
      "step": 4695
    },
    {
      "epoch": 1.7341211225997046,
      "grad_norm": 0.2667185366153717,
      "learning_rate": 8.444389703165415e-05,
      "loss": 0.2061,
      "step": 4696
    },
    {
      "epoch": 1.7344903988183162,
      "grad_norm": 0.23923826217651367,
      "learning_rate": 8.441926345609066e-05,
      "loss": 0.2153,
      "step": 4697
    },
    {
      "epoch": 1.7348596750369276,
      "grad_norm": 0.2720220386981964,
      "learning_rate": 8.439462988052716e-05,
      "loss": 0.1984,
      "step": 4698
    },
    {
      "epoch": 1.735228951255539,
      "grad_norm": 0.2901582717895508,
      "learning_rate": 8.436999630496368e-05,
      "loss": 0.285,
      "step": 4699
    },
    {
      "epoch": 1.7355982274741506,
      "grad_norm": 0.22563205659389496,
      "learning_rate": 8.434536272940018e-05,
      "loss": 0.1913,
      "step": 4700
    },
    {
      "epoch": 1.7355982274741506,
      "eval_loss": 0.2577421963214874,
      "eval_runtime": 5.8614,
      "eval_samples_per_second": 8.53,
      "eval_steps_per_second": 1.194,
      "step": 4700
    },
    {
      "epoch": 1.7359675036927622,
      "grad_norm": 0.28509747982025146,
      "learning_rate": 8.432072915383668e-05,
      "loss": 0.2285,
      "step": 4701
    },
    {
      "epoch": 1.7363367799113738,
      "grad_norm": 0.3507232367992401,
      "learning_rate": 8.42960955782732e-05,
      "loss": 0.2764,
      "step": 4702
    },
    {
      "epoch": 1.7367060561299852,
      "grad_norm": 0.28448420763015747,
      "learning_rate": 8.42714620027097e-05,
      "loss": 0.2002,
      "step": 4703
    },
    {
      "epoch": 1.7370753323485968,
      "grad_norm": 0.28723815083503723,
      "learning_rate": 8.424682842714621e-05,
      "loss": 0.193,
      "step": 4704
    },
    {
      "epoch": 1.7374446085672082,
      "grad_norm": 0.29733386635780334,
      "learning_rate": 8.422219485158271e-05,
      "loss": 0.2499,
      "step": 4705
    },
    {
      "epoch": 1.7378138847858198,
      "grad_norm": 0.27715975046157837,
      "learning_rate": 8.419756127601923e-05,
      "loss": 0.2344,
      "step": 4706
    },
    {
      "epoch": 1.7381831610044314,
      "grad_norm": 0.2789750397205353,
      "learning_rate": 8.417292770045573e-05,
      "loss": 0.2556,
      "step": 4707
    },
    {
      "epoch": 1.738552437223043,
      "grad_norm": 0.27307644486427307,
      "learning_rate": 8.414829412489223e-05,
      "loss": 0.2221,
      "step": 4708
    },
    {
      "epoch": 1.7389217134416544,
      "grad_norm": 0.288707435131073,
      "learning_rate": 8.412366054932874e-05,
      "loss": 0.2176,
      "step": 4709
    },
    {
      "epoch": 1.7392909896602657,
      "grad_norm": 0.3327498733997345,
      "learning_rate": 8.409902697376524e-05,
      "loss": 0.2652,
      "step": 4710
    },
    {
      "epoch": 1.7396602658788773,
      "grad_norm": 0.405509352684021,
      "learning_rate": 8.407439339820176e-05,
      "loss": 0.2141,
      "step": 4711
    },
    {
      "epoch": 1.740029542097489,
      "grad_norm": 0.29631343483924866,
      "learning_rate": 8.404975982263826e-05,
      "loss": 0.2259,
      "step": 4712
    },
    {
      "epoch": 1.7403988183161005,
      "grad_norm": 0.2549144923686981,
      "learning_rate": 8.402512624707477e-05,
      "loss": 0.2226,
      "step": 4713
    },
    {
      "epoch": 1.740768094534712,
      "grad_norm": 0.30277979373931885,
      "learning_rate": 8.400049267151128e-05,
      "loss": 0.226,
      "step": 4714
    },
    {
      "epoch": 1.7411373707533235,
      "grad_norm": 0.24072663486003876,
      "learning_rate": 8.397585909594779e-05,
      "loss": 0.2008,
      "step": 4715
    },
    {
      "epoch": 1.741506646971935,
      "grad_norm": 0.3189060688018799,
      "learning_rate": 8.395122552038429e-05,
      "loss": 0.2356,
      "step": 4716
    },
    {
      "epoch": 1.7418759231905465,
      "grad_norm": 0.2404140830039978,
      "learning_rate": 8.392659194482079e-05,
      "loss": 0.2212,
      "step": 4717
    },
    {
      "epoch": 1.7422451994091581,
      "grad_norm": 0.29535868763923645,
      "learning_rate": 8.390195836925731e-05,
      "loss": 0.2348,
      "step": 4718
    },
    {
      "epoch": 1.7426144756277697,
      "grad_norm": 0.27296799421310425,
      "learning_rate": 8.387732479369381e-05,
      "loss": 0.2101,
      "step": 4719
    },
    {
      "epoch": 1.742983751846381,
      "grad_norm": 0.2823295593261719,
      "learning_rate": 8.385269121813032e-05,
      "loss": 0.216,
      "step": 4720
    },
    {
      "epoch": 1.7433530280649925,
      "grad_norm": 0.3427070081233978,
      "learning_rate": 8.382805764256682e-05,
      "loss": 0.2699,
      "step": 4721
    },
    {
      "epoch": 1.743722304283604,
      "grad_norm": 0.26912540197372437,
      "learning_rate": 8.380342406700334e-05,
      "loss": 0.2049,
      "step": 4722
    },
    {
      "epoch": 1.7440915805022157,
      "grad_norm": 0.30528247356414795,
      "learning_rate": 8.377879049143984e-05,
      "loss": 0.2585,
      "step": 4723
    },
    {
      "epoch": 1.7444608567208273,
      "grad_norm": 0.2788606882095337,
      "learning_rate": 8.375415691587634e-05,
      "loss": 0.2297,
      "step": 4724
    },
    {
      "epoch": 1.7448301329394387,
      "grad_norm": 0.3505759537220001,
      "learning_rate": 8.372952334031286e-05,
      "loss": 0.2437,
      "step": 4725
    },
    {
      "epoch": 1.7451994091580503,
      "grad_norm": 0.2676331400871277,
      "learning_rate": 8.370488976474936e-05,
      "loss": 0.1927,
      "step": 4726
    },
    {
      "epoch": 1.7455686853766617,
      "grad_norm": 0.4100620746612549,
      "learning_rate": 8.368025618918587e-05,
      "loss": 0.3176,
      "step": 4727
    },
    {
      "epoch": 1.7459379615952733,
      "grad_norm": 0.25829556584358215,
      "learning_rate": 8.365562261362237e-05,
      "loss": 0.212,
      "step": 4728
    },
    {
      "epoch": 1.7463072378138849,
      "grad_norm": 0.23964823782444,
      "learning_rate": 8.363098903805889e-05,
      "loss": 0.2282,
      "step": 4729
    },
    {
      "epoch": 1.7466765140324965,
      "grad_norm": 0.2864070236682892,
      "learning_rate": 8.360635546249539e-05,
      "loss": 0.2392,
      "step": 4730
    },
    {
      "epoch": 1.7470457902511078,
      "grad_norm": 0.2946716547012329,
      "learning_rate": 8.35817218869319e-05,
      "loss": 0.2233,
      "step": 4731
    },
    {
      "epoch": 1.7474150664697192,
      "grad_norm": 0.27674975991249084,
      "learning_rate": 8.35570883113684e-05,
      "loss": 0.2156,
      "step": 4732
    },
    {
      "epoch": 1.7477843426883308,
      "grad_norm": 0.2801598608493805,
      "learning_rate": 8.35324547358049e-05,
      "loss": 0.2279,
      "step": 4733
    },
    {
      "epoch": 1.7481536189069424,
      "grad_norm": 0.25461244583129883,
      "learning_rate": 8.350782116024142e-05,
      "loss": 0.2174,
      "step": 4734
    },
    {
      "epoch": 1.748522895125554,
      "grad_norm": 0.29004809260368347,
      "learning_rate": 8.348318758467792e-05,
      "loss": 0.259,
      "step": 4735
    },
    {
      "epoch": 1.7488921713441654,
      "grad_norm": 0.37906742095947266,
      "learning_rate": 8.345855400911444e-05,
      "loss": 0.2609,
      "step": 4736
    },
    {
      "epoch": 1.7492614475627768,
      "grad_norm": 0.2857353389263153,
      "learning_rate": 8.343392043355094e-05,
      "loss": 0.2181,
      "step": 4737
    },
    {
      "epoch": 1.7496307237813884,
      "grad_norm": 0.2577114999294281,
      "learning_rate": 8.340928685798745e-05,
      "loss": 0.2202,
      "step": 4738
    },
    {
      "epoch": 1.75,
      "grad_norm": 0.36852386593818665,
      "learning_rate": 8.338465328242395e-05,
      "loss": 0.2747,
      "step": 4739
    },
    {
      "epoch": 1.7503692762186116,
      "grad_norm": 0.26849859952926636,
      "learning_rate": 8.336001970686045e-05,
      "loss": 0.2054,
      "step": 4740
    },
    {
      "epoch": 1.7507385524372232,
      "grad_norm": 0.24560749530792236,
      "learning_rate": 8.333538613129697e-05,
      "loss": 0.1925,
      "step": 4741
    },
    {
      "epoch": 1.7511078286558346,
      "grad_norm": 0.2803412973880768,
      "learning_rate": 8.331075255573347e-05,
      "loss": 0.2511,
      "step": 4742
    },
    {
      "epoch": 1.751477104874446,
      "grad_norm": 0.24951079487800598,
      "learning_rate": 8.328611898016998e-05,
      "loss": 0.2099,
      "step": 4743
    },
    {
      "epoch": 1.7518463810930576,
      "grad_norm": 0.2854885160923004,
      "learning_rate": 8.326148540460648e-05,
      "loss": 0.2514,
      "step": 4744
    },
    {
      "epoch": 1.7522156573116692,
      "grad_norm": 0.33116501569747925,
      "learning_rate": 8.3236851829043e-05,
      "loss": 0.2803,
      "step": 4745
    },
    {
      "epoch": 1.7525849335302808,
      "grad_norm": 0.23747555911540985,
      "learning_rate": 8.32122182534795e-05,
      "loss": 0.1952,
      "step": 4746
    },
    {
      "epoch": 1.7529542097488922,
      "grad_norm": 0.2971426248550415,
      "learning_rate": 8.318758467791602e-05,
      "loss": 0.2456,
      "step": 4747
    },
    {
      "epoch": 1.7533234859675035,
      "grad_norm": 0.2350413203239441,
      "learning_rate": 8.316295110235252e-05,
      "loss": 0.1831,
      "step": 4748
    },
    {
      "epoch": 1.7536927621861151,
      "grad_norm": 0.2742297947406769,
      "learning_rate": 8.313831752678902e-05,
      "loss": 0.2131,
      "step": 4749
    },
    {
      "epoch": 1.7540620384047267,
      "grad_norm": 0.2849682867527008,
      "learning_rate": 8.311368395122553e-05,
      "loss": 0.23,
      "step": 4750
    },
    {
      "epoch": 1.7540620384047267,
      "eval_loss": 0.25629734992980957,
      "eval_runtime": 5.8662,
      "eval_samples_per_second": 8.523,
      "eval_steps_per_second": 1.193,
      "step": 4750
    },
    {
      "epoch": 1.7544313146233383,
      "grad_norm": 0.2834409475326538,
      "learning_rate": 8.308905037566203e-05,
      "loss": 0.2206,
      "step": 4751
    },
    {
      "epoch": 1.7548005908419497,
      "grad_norm": 0.24136458337306976,
      "learning_rate": 8.306441680009855e-05,
      "loss": 0.2136,
      "step": 4752
    },
    {
      "epoch": 1.7551698670605613,
      "grad_norm": 0.28627488017082214,
      "learning_rate": 8.303978322453505e-05,
      "loss": 0.2252,
      "step": 4753
    },
    {
      "epoch": 1.7555391432791727,
      "grad_norm": 0.27557557821273804,
      "learning_rate": 8.301514964897156e-05,
      "loss": 0.2256,
      "step": 4754
    },
    {
      "epoch": 1.7559084194977843,
      "grad_norm": 0.28202036023139954,
      "learning_rate": 8.299051607340806e-05,
      "loss": 0.2478,
      "step": 4755
    },
    {
      "epoch": 1.756277695716396,
      "grad_norm": 0.2877001464366913,
      "learning_rate": 8.296588249784457e-05,
      "loss": 0.2426,
      "step": 4756
    },
    {
      "epoch": 1.7566469719350075,
      "grad_norm": 0.2511405646800995,
      "learning_rate": 8.294124892228108e-05,
      "loss": 0.2104,
      "step": 4757
    },
    {
      "epoch": 1.757016248153619,
      "grad_norm": 0.24227432906627655,
      "learning_rate": 8.291661534671758e-05,
      "loss": 0.2121,
      "step": 4758
    },
    {
      "epoch": 1.7573855243722303,
      "grad_norm": 0.2546199560165405,
      "learning_rate": 8.28919817711541e-05,
      "loss": 0.2281,
      "step": 4759
    },
    {
      "epoch": 1.7577548005908419,
      "grad_norm": 0.31248635053634644,
      "learning_rate": 8.28673481955906e-05,
      "loss": 0.2274,
      "step": 4760
    },
    {
      "epoch": 1.7581240768094535,
      "grad_norm": 0.2701592743396759,
      "learning_rate": 8.28427146200271e-05,
      "loss": 0.21,
      "step": 4761
    },
    {
      "epoch": 1.758493353028065,
      "grad_norm": 0.29244470596313477,
      "learning_rate": 8.28180810444636e-05,
      "loss": 0.2072,
      "step": 4762
    },
    {
      "epoch": 1.7588626292466765,
      "grad_norm": 0.25586065649986267,
      "learning_rate": 8.279344746890011e-05,
      "loss": 0.2153,
      "step": 4763
    },
    {
      "epoch": 1.759231905465288,
      "grad_norm": 0.3217445909976959,
      "learning_rate": 8.276881389333661e-05,
      "loss": 0.2646,
      "step": 4764
    },
    {
      "epoch": 1.7596011816838995,
      "grad_norm": 0.32226285338401794,
      "learning_rate": 8.274418031777313e-05,
      "loss": 0.2796,
      "step": 4765
    },
    {
      "epoch": 1.759970457902511,
      "grad_norm": 0.3295985758304596,
      "learning_rate": 8.271954674220963e-05,
      "loss": 0.2184,
      "step": 4766
    },
    {
      "epoch": 1.7603397341211227,
      "grad_norm": 0.28168031573295593,
      "learning_rate": 8.269491316664613e-05,
      "loss": 0.249,
      "step": 4767
    },
    {
      "epoch": 1.7607090103397343,
      "grad_norm": 0.2556081712245941,
      "learning_rate": 8.267027959108265e-05,
      "loss": 0.2415,
      "step": 4768
    },
    {
      "epoch": 1.7610782865583456,
      "grad_norm": 0.2822915315628052,
      "learning_rate": 8.264564601551915e-05,
      "loss": 0.2393,
      "step": 4769
    },
    {
      "epoch": 1.761447562776957,
      "grad_norm": 0.2232300341129303,
      "learning_rate": 8.262101243995566e-05,
      "loss": 0.1948,
      "step": 4770
    },
    {
      "epoch": 1.7618168389955686,
      "grad_norm": 0.2830291986465454,
      "learning_rate": 8.259637886439216e-05,
      "loss": 0.2162,
      "step": 4771
    },
    {
      "epoch": 1.7621861152141802,
      "grad_norm": 0.2080407440662384,
      "learning_rate": 8.257174528882868e-05,
      "loss": 0.1822,
      "step": 4772
    },
    {
      "epoch": 1.7625553914327918,
      "grad_norm": 0.27873337268829346,
      "learning_rate": 8.254711171326518e-05,
      "loss": 0.2165,
      "step": 4773
    },
    {
      "epoch": 1.7629246676514032,
      "grad_norm": 0.22472889721393585,
      "learning_rate": 8.252247813770168e-05,
      "loss": 0.2033,
      "step": 4774
    },
    {
      "epoch": 1.7632939438700148,
      "grad_norm": 0.24323783814907074,
      "learning_rate": 8.24978445621382e-05,
      "loss": 0.1833,
      "step": 4775
    },
    {
      "epoch": 1.7636632200886262,
      "grad_norm": 0.344291090965271,
      "learning_rate": 8.24732109865747e-05,
      "loss": 0.2771,
      "step": 4776
    },
    {
      "epoch": 1.7640324963072378,
      "grad_norm": 0.23993299901485443,
      "learning_rate": 8.244857741101121e-05,
      "loss": 0.2151,
      "step": 4777
    },
    {
      "epoch": 1.7644017725258494,
      "grad_norm": 0.2538295090198517,
      "learning_rate": 8.242394383544771e-05,
      "loss": 0.193,
      "step": 4778
    },
    {
      "epoch": 1.764771048744461,
      "grad_norm": 0.24399816989898682,
      "learning_rate": 8.239931025988423e-05,
      "loss": 0.2047,
      "step": 4779
    },
    {
      "epoch": 1.7651403249630724,
      "grad_norm": 0.27396318316459656,
      "learning_rate": 8.237467668432073e-05,
      "loss": 0.2298,
      "step": 4780
    },
    {
      "epoch": 1.7655096011816838,
      "grad_norm": 0.2602836489677429,
      "learning_rate": 8.235004310875724e-05,
      "loss": 0.2292,
      "step": 4781
    },
    {
      "epoch": 1.7658788774002954,
      "grad_norm": 0.3122258186340332,
      "learning_rate": 8.232540953319374e-05,
      "loss": 0.2347,
      "step": 4782
    },
    {
      "epoch": 1.766248153618907,
      "grad_norm": 0.21484318375587463,
      "learning_rate": 8.230077595763024e-05,
      "loss": 0.1888,
      "step": 4783
    },
    {
      "epoch": 1.7666174298375186,
      "grad_norm": 0.3026772439479828,
      "learning_rate": 8.227614238206676e-05,
      "loss": 0.2152,
      "step": 4784
    },
    {
      "epoch": 1.76698670605613,
      "grad_norm": 0.2766391634941101,
      "learning_rate": 8.225150880650326e-05,
      "loss": 0.2407,
      "step": 4785
    },
    {
      "epoch": 1.7673559822747416,
      "grad_norm": 0.3706916868686676,
      "learning_rate": 8.222687523093977e-05,
      "loss": 0.2933,
      "step": 4786
    },
    {
      "epoch": 1.767725258493353,
      "grad_norm": 0.2903095483779907,
      "learning_rate": 8.220224165537628e-05,
      "loss": 0.2039,
      "step": 4787
    },
    {
      "epoch": 1.7680945347119645,
      "grad_norm": 0.2527204155921936,
      "learning_rate": 8.217760807981279e-05,
      "loss": 0.206,
      "step": 4788
    },
    {
      "epoch": 1.7684638109305761,
      "grad_norm": 0.2885925769805908,
      "learning_rate": 8.215297450424929e-05,
      "loss": 0.2627,
      "step": 4789
    },
    {
      "epoch": 1.7688330871491877,
      "grad_norm": 0.2507830262184143,
      "learning_rate": 8.212834092868579e-05,
      "loss": 0.2128,
      "step": 4790
    },
    {
      "epoch": 1.7692023633677991,
      "grad_norm": 0.28362154960632324,
      "learning_rate": 8.210370735312231e-05,
      "loss": 0.2192,
      "step": 4791
    },
    {
      "epoch": 1.7695716395864105,
      "grad_norm": 0.2576613426208496,
      "learning_rate": 8.207907377755881e-05,
      "loss": 0.1932,
      "step": 4792
    },
    {
      "epoch": 1.769940915805022,
      "grad_norm": 0.27201154828071594,
      "learning_rate": 8.205444020199532e-05,
      "loss": 0.243,
      "step": 4793
    },
    {
      "epoch": 1.7703101920236337,
      "grad_norm": 0.24626173079013824,
      "learning_rate": 8.202980662643182e-05,
      "loss": 0.2322,
      "step": 4794
    },
    {
      "epoch": 1.7706794682422453,
      "grad_norm": 0.24108372628688812,
      "learning_rate": 8.200517305086834e-05,
      "loss": 0.2444,
      "step": 4795
    },
    {
      "epoch": 1.7710487444608567,
      "grad_norm": 0.3414941430091858,
      "learning_rate": 8.198053947530484e-05,
      "loss": 0.2722,
      "step": 4796
    },
    {
      "epoch": 1.7714180206794683,
      "grad_norm": 0.3452490270137787,
      "learning_rate": 8.195590589974135e-05,
      "loss": 0.2412,
      "step": 4797
    },
    {
      "epoch": 1.7717872968980797,
      "grad_norm": 0.3017958104610443,
      "learning_rate": 8.193127232417786e-05,
      "loss": 0.2278,
      "step": 4798
    },
    {
      "epoch": 1.7721565731166913,
      "grad_norm": 0.25969305634498596,
      "learning_rate": 8.190663874861436e-05,
      "loss": 0.2016,
      "step": 4799
    },
    {
      "epoch": 1.7725258493353029,
      "grad_norm": 0.24949228763580322,
      "learning_rate": 8.188200517305087e-05,
      "loss": 0.2216,
      "step": 4800
    },
    {
      "epoch": 1.7725258493353029,
      "eval_loss": 0.2572707235813141,
      "eval_runtime": 5.8665,
      "eval_samples_per_second": 8.523,
      "eval_steps_per_second": 1.193,
      "step": 4800
    },
    {
      "epoch": 1.7728951255539145,
      "grad_norm": 0.3111516833305359,
      "learning_rate": 8.185737159748737e-05,
      "loss": 0.2475,
      "step": 4801
    },
    {
      "epoch": 1.7732644017725259,
      "grad_norm": 0.26070913672447205,
      "learning_rate": 8.183273802192389e-05,
      "loss": 0.2392,
      "step": 4802
    },
    {
      "epoch": 1.7736336779911372,
      "grad_norm": 0.29763057827949524,
      "learning_rate": 8.180810444636039e-05,
      "loss": 0.2547,
      "step": 4803
    },
    {
      "epoch": 1.7740029542097489,
      "grad_norm": 0.342492938041687,
      "learning_rate": 8.17834708707969e-05,
      "loss": 0.2694,
      "step": 4804
    },
    {
      "epoch": 1.7743722304283605,
      "grad_norm": 0.34890803694725037,
      "learning_rate": 8.17588372952334e-05,
      "loss": 0.2875,
      "step": 4805
    },
    {
      "epoch": 1.774741506646972,
      "grad_norm": 0.2868111729621887,
      "learning_rate": 8.17342037196699e-05,
      "loss": 0.2002,
      "step": 4806
    },
    {
      "epoch": 1.7751107828655834,
      "grad_norm": 0.2373673915863037,
      "learning_rate": 8.170957014410642e-05,
      "loss": 0.2005,
      "step": 4807
    },
    {
      "epoch": 1.7754800590841948,
      "grad_norm": 0.30628499388694763,
      "learning_rate": 8.168493656854292e-05,
      "loss": 0.225,
      "step": 4808
    },
    {
      "epoch": 1.7758493353028064,
      "grad_norm": 0.2583736181259155,
      "learning_rate": 8.166030299297943e-05,
      "loss": 0.2286,
      "step": 4809
    },
    {
      "epoch": 1.776218611521418,
      "grad_norm": 0.2954586148262024,
      "learning_rate": 8.163566941741594e-05,
      "loss": 0.279,
      "step": 4810
    },
    {
      "epoch": 1.7765878877400296,
      "grad_norm": 0.23172208666801453,
      "learning_rate": 8.161103584185245e-05,
      "loss": 0.2113,
      "step": 4811
    },
    {
      "epoch": 1.7769571639586412,
      "grad_norm": 0.28336572647094727,
      "learning_rate": 8.158640226628895e-05,
      "loss": 0.2553,
      "step": 4812
    },
    {
      "epoch": 1.7773264401772526,
      "grad_norm": 0.3208393454551697,
      "learning_rate": 8.156176869072547e-05,
      "loss": 0.2283,
      "step": 4813
    },
    {
      "epoch": 1.777695716395864,
      "grad_norm": 0.30786389112472534,
      "learning_rate": 8.153713511516197e-05,
      "loss": 0.2594,
      "step": 4814
    },
    {
      "epoch": 1.7780649926144756,
      "grad_norm": 0.30445629358291626,
      "learning_rate": 8.151250153959847e-05,
      "loss": 0.276,
      "step": 4815
    },
    {
      "epoch": 1.7784342688330872,
      "grad_norm": 0.3171381652355194,
      "learning_rate": 8.148786796403498e-05,
      "loss": 0.2435,
      "step": 4816
    },
    {
      "epoch": 1.7788035450516988,
      "grad_norm": 0.36137646436691284,
      "learning_rate": 8.146323438847148e-05,
      "loss": 0.334,
      "step": 4817
    },
    {
      "epoch": 1.7791728212703102,
      "grad_norm": 0.3493507504463196,
      "learning_rate": 8.1438600812908e-05,
      "loss": 0.2908,
      "step": 4818
    },
    {
      "epoch": 1.7795420974889216,
      "grad_norm": 0.2983262836933136,
      "learning_rate": 8.14139672373445e-05,
      "loss": 0.2556,
      "step": 4819
    },
    {
      "epoch": 1.7799113737075332,
      "grad_norm": 0.23181068897247314,
      "learning_rate": 8.138933366178101e-05,
      "loss": 0.2151,
      "step": 4820
    },
    {
      "epoch": 1.7802806499261448,
      "grad_norm": 0.3134463429450989,
      "learning_rate": 8.136470008621752e-05,
      "loss": 0.247,
      "step": 4821
    },
    {
      "epoch": 1.7806499261447564,
      "grad_norm": 0.22138331830501556,
      "learning_rate": 8.134006651065402e-05,
      "loss": 0.1989,
      "step": 4822
    },
    {
      "epoch": 1.7810192023633677,
      "grad_norm": 0.30223992466926575,
      "learning_rate": 8.131543293509053e-05,
      "loss": 0.2792,
      "step": 4823
    },
    {
      "epoch": 1.7813884785819794,
      "grad_norm": 0.22743350267410278,
      "learning_rate": 8.129079935952703e-05,
      "loss": 0.2085,
      "step": 4824
    },
    {
      "epoch": 1.7817577548005907,
      "grad_norm": 0.23054760694503784,
      "learning_rate": 8.126616578396355e-05,
      "loss": 0.1987,
      "step": 4825
    },
    {
      "epoch": 1.7821270310192023,
      "grad_norm": 0.2348661571741104,
      "learning_rate": 8.124153220840005e-05,
      "loss": 0.2121,
      "step": 4826
    },
    {
      "epoch": 1.782496307237814,
      "grad_norm": 0.27090921998023987,
      "learning_rate": 8.121689863283656e-05,
      "loss": 0.2425,
      "step": 4827
    },
    {
      "epoch": 1.7828655834564255,
      "grad_norm": 0.27171093225479126,
      "learning_rate": 8.119226505727306e-05,
      "loss": 0.2258,
      "step": 4828
    },
    {
      "epoch": 1.783234859675037,
      "grad_norm": 0.29861265420913696,
      "learning_rate": 8.116763148170956e-05,
      "loss": 0.222,
      "step": 4829
    },
    {
      "epoch": 1.7836041358936483,
      "grad_norm": 0.3010806441307068,
      "learning_rate": 8.114299790614608e-05,
      "loss": 0.2617,
      "step": 4830
    },
    {
      "epoch": 1.78397341211226,
      "grad_norm": 0.30580946803092957,
      "learning_rate": 8.111836433058258e-05,
      "loss": 0.2289,
      "step": 4831
    },
    {
      "epoch": 1.7843426883308715,
      "grad_norm": 0.29021722078323364,
      "learning_rate": 8.10937307550191e-05,
      "loss": 0.2407,
      "step": 4832
    },
    {
      "epoch": 1.784711964549483,
      "grad_norm": 0.2731410264968872,
      "learning_rate": 8.10690971794556e-05,
      "loss": 0.2624,
      "step": 4833
    },
    {
      "epoch": 1.7850812407680945,
      "grad_norm": 0.24605174362659454,
      "learning_rate": 8.104446360389211e-05,
      "loss": 0.1958,
      "step": 4834
    },
    {
      "epoch": 1.785450516986706,
      "grad_norm": 0.22555018961429596,
      "learning_rate": 8.101983002832861e-05,
      "loss": 0.2166,
      "step": 4835
    },
    {
      "epoch": 1.7858197932053175,
      "grad_norm": 0.27406737208366394,
      "learning_rate": 8.099519645276513e-05,
      "loss": 0.2206,
      "step": 4836
    },
    {
      "epoch": 1.786189069423929,
      "grad_norm": 0.35636645555496216,
      "learning_rate": 8.097056287720163e-05,
      "loss": 0.208,
      "step": 4837
    },
    {
      "epoch": 1.7865583456425407,
      "grad_norm": 0.25280651450157166,
      "learning_rate": 8.094592930163813e-05,
      "loss": 0.2125,
      "step": 4838
    },
    {
      "epoch": 1.7869276218611523,
      "grad_norm": 0.27054616808891296,
      "learning_rate": 8.092129572607464e-05,
      "loss": 0.2327,
      "step": 4839
    },
    {
      "epoch": 1.7872968980797637,
      "grad_norm": 0.2454398274421692,
      "learning_rate": 8.089666215051114e-05,
      "loss": 0.2025,
      "step": 4840
    },
    {
      "epoch": 1.787666174298375,
      "grad_norm": 0.24820207059383392,
      "learning_rate": 8.087202857494766e-05,
      "loss": 0.1859,
      "step": 4841
    },
    {
      "epoch": 1.7880354505169866,
      "grad_norm": 0.2680160403251648,
      "learning_rate": 8.084739499938416e-05,
      "loss": 0.1961,
      "step": 4842
    },
    {
      "epoch": 1.7884047267355982,
      "grad_norm": 0.27619653940200806,
      "learning_rate": 8.082276142382068e-05,
      "loss": 0.2179,
      "step": 4843
    },
    {
      "epoch": 1.7887740029542099,
      "grad_norm": 0.2535490393638611,
      "learning_rate": 8.079812784825718e-05,
      "loss": 0.2169,
      "step": 4844
    },
    {
      "epoch": 1.7891432791728212,
      "grad_norm": 0.27932682633399963,
      "learning_rate": 8.077349427269368e-05,
      "loss": 0.2326,
      "step": 4845
    },
    {
      "epoch": 1.7895125553914328,
      "grad_norm": 0.2859097421169281,
      "learning_rate": 8.074886069713019e-05,
      "loss": 0.2111,
      "step": 4846
    },
    {
      "epoch": 1.7898818316100442,
      "grad_norm": 0.3570568263530731,
      "learning_rate": 8.072422712156669e-05,
      "loss": 0.268,
      "step": 4847
    },
    {
      "epoch": 1.7902511078286558,
      "grad_norm": 0.2741071879863739,
      "learning_rate": 8.069959354600321e-05,
      "loss": 0.2286,
      "step": 4848
    },
    {
      "epoch": 1.7906203840472674,
      "grad_norm": 0.3137156069278717,
      "learning_rate": 8.067495997043971e-05,
      "loss": 0.2646,
      "step": 4849
    },
    {
      "epoch": 1.790989660265879,
      "grad_norm": 0.300356388092041,
      "learning_rate": 8.065032639487622e-05,
      "loss": 0.2334,
      "step": 4850
    },
    {
      "epoch": 1.790989660265879,
      "eval_loss": 0.2527002692222595,
      "eval_runtime": 5.8733,
      "eval_samples_per_second": 8.513,
      "eval_steps_per_second": 1.192,
      "step": 4850
    },
    {
      "epoch": 1.7913589364844904,
      "grad_norm": 0.2975861430168152,
      "learning_rate": 8.062569281931272e-05,
      "loss": 0.2254,
      "step": 4851
    },
    {
      "epoch": 1.7917282127031018,
      "grad_norm": 0.2494833618402481,
      "learning_rate": 8.060105924374924e-05,
      "loss": 0.2048,
      "step": 4852
    },
    {
      "epoch": 1.7920974889217134,
      "grad_norm": 0.31400489807128906,
      "learning_rate": 8.057642566818574e-05,
      "loss": 0.1961,
      "step": 4853
    },
    {
      "epoch": 1.792466765140325,
      "grad_norm": 0.2428143173456192,
      "learning_rate": 8.055179209262224e-05,
      "loss": 0.2085,
      "step": 4854
    },
    {
      "epoch": 1.7928360413589366,
      "grad_norm": 0.28034815192222595,
      "learning_rate": 8.052715851705876e-05,
      "loss": 0.2512,
      "step": 4855
    },
    {
      "epoch": 1.793205317577548,
      "grad_norm": 0.2713885009288788,
      "learning_rate": 8.050252494149526e-05,
      "loss": 0.2438,
      "step": 4856
    },
    {
      "epoch": 1.7935745937961596,
      "grad_norm": 0.2918667793273926,
      "learning_rate": 8.047789136593177e-05,
      "loss": 0.2473,
      "step": 4857
    },
    {
      "epoch": 1.793943870014771,
      "grad_norm": 0.33813074231147766,
      "learning_rate": 8.045325779036827e-05,
      "loss": 0.2621,
      "step": 4858
    },
    {
      "epoch": 1.7943131462333826,
      "grad_norm": 0.29765844345092773,
      "learning_rate": 8.042862421480479e-05,
      "loss": 0.2367,
      "step": 4859
    },
    {
      "epoch": 1.7946824224519942,
      "grad_norm": 0.2806040048599243,
      "learning_rate": 8.040399063924129e-05,
      "loss": 0.2404,
      "step": 4860
    },
    {
      "epoch": 1.7950516986706058,
      "grad_norm": 0.32674238085746765,
      "learning_rate": 8.037935706367779e-05,
      "loss": 0.2396,
      "step": 4861
    },
    {
      "epoch": 1.7954209748892171,
      "grad_norm": 0.28877851366996765,
      "learning_rate": 8.03547234881143e-05,
      "loss": 0.2611,
      "step": 4862
    },
    {
      "epoch": 1.7957902511078285,
      "grad_norm": 0.2757430076599121,
      "learning_rate": 8.03300899125508e-05,
      "loss": 0.1673,
      "step": 4863
    },
    {
      "epoch": 1.7961595273264401,
      "grad_norm": 0.27084866166114807,
      "learning_rate": 8.030545633698732e-05,
      "loss": 0.2196,
      "step": 4864
    },
    {
      "epoch": 1.7965288035450517,
      "grad_norm": 0.2568441927433014,
      "learning_rate": 8.028082276142382e-05,
      "loss": 0.2232,
      "step": 4865
    },
    {
      "epoch": 1.7968980797636633,
      "grad_norm": 0.27764129638671875,
      "learning_rate": 8.025618918586034e-05,
      "loss": 0.264,
      "step": 4866
    },
    {
      "epoch": 1.7972673559822747,
      "grad_norm": 0.2663974463939667,
      "learning_rate": 8.023155561029684e-05,
      "loss": 0.2321,
      "step": 4867
    },
    {
      "epoch": 1.797636632200886,
      "grad_norm": 0.3205213248729706,
      "learning_rate": 8.020692203473335e-05,
      "loss": 0.2311,
      "step": 4868
    },
    {
      "epoch": 1.7980059084194977,
      "grad_norm": 0.2799781858921051,
      "learning_rate": 8.018228845916985e-05,
      "loss": 0.2061,
      "step": 4869
    },
    {
      "epoch": 1.7983751846381093,
      "grad_norm": 0.27617910504341125,
      "learning_rate": 8.015765488360635e-05,
      "loss": 0.2078,
      "step": 4870
    },
    {
      "epoch": 1.798744460856721,
      "grad_norm": 0.2966643273830414,
      "learning_rate": 8.013302130804287e-05,
      "loss": 0.1973,
      "step": 4871
    },
    {
      "epoch": 1.7991137370753325,
      "grad_norm": 0.2345452457666397,
      "learning_rate": 8.010838773247937e-05,
      "loss": 0.2251,
      "step": 4872
    },
    {
      "epoch": 1.799483013293944,
      "grad_norm": 0.2593734562397003,
      "learning_rate": 8.008375415691588e-05,
      "loss": 0.2127,
      "step": 4873
    },
    {
      "epoch": 1.7998522895125553,
      "grad_norm": 0.3477763533592224,
      "learning_rate": 8.005912058135238e-05,
      "loss": 0.2617,
      "step": 4874
    },
    {
      "epoch": 1.8002215657311669,
      "grad_norm": 0.29197418689727783,
      "learning_rate": 8.00344870057889e-05,
      "loss": 0.2382,
      "step": 4875
    },
    {
      "epoch": 1.8005908419497785,
      "grad_norm": 0.26208192110061646,
      "learning_rate": 8.00098534302254e-05,
      "loss": 0.2264,
      "step": 4876
    },
    {
      "epoch": 1.80096011816839,
      "grad_norm": 0.30810004472732544,
      "learning_rate": 7.99852198546619e-05,
      "loss": 0.2339,
      "step": 4877
    },
    {
      "epoch": 1.8013293943870015,
      "grad_norm": 0.31405359506607056,
      "learning_rate": 7.996058627909842e-05,
      "loss": 0.2072,
      "step": 4878
    },
    {
      "epoch": 1.8016986706056128,
      "grad_norm": 0.22917915880680084,
      "learning_rate": 7.993595270353492e-05,
      "loss": 0.2053,
      "step": 4879
    },
    {
      "epoch": 1.8020679468242244,
      "grad_norm": 0.23804986476898193,
      "learning_rate": 7.991131912797143e-05,
      "loss": 0.1675,
      "step": 4880
    },
    {
      "epoch": 1.802437223042836,
      "grad_norm": 0.24717144668102264,
      "learning_rate": 7.988668555240793e-05,
      "loss": 0.21,
      "step": 4881
    },
    {
      "epoch": 1.8028064992614476,
      "grad_norm": 0.2823777198791504,
      "learning_rate": 7.986205197684445e-05,
      "loss": 0.2119,
      "step": 4882
    },
    {
      "epoch": 1.803175775480059,
      "grad_norm": 0.27875393629074097,
      "learning_rate": 7.983741840128095e-05,
      "loss": 0.2538,
      "step": 4883
    },
    {
      "epoch": 1.8035450516986706,
      "grad_norm": 0.2991819381713867,
      "learning_rate": 7.981278482571746e-05,
      "loss": 0.2463,
      "step": 4884
    },
    {
      "epoch": 1.803914327917282,
      "grad_norm": 0.28031617403030396,
      "learning_rate": 7.978815125015396e-05,
      "loss": 0.2174,
      "step": 4885
    },
    {
      "epoch": 1.8042836041358936,
      "grad_norm": 0.2618853747844696,
      "learning_rate": 7.976351767459047e-05,
      "loss": 0.2306,
      "step": 4886
    },
    {
      "epoch": 1.8046528803545052,
      "grad_norm": 0.27993547916412354,
      "learning_rate": 7.973888409902698e-05,
      "loss": 0.2692,
      "step": 4887
    },
    {
      "epoch": 1.8050221565731168,
      "grad_norm": 0.29700198769569397,
      "learning_rate": 7.971425052346348e-05,
      "loss": 0.2188,
      "step": 4888
    },
    {
      "epoch": 1.8053914327917282,
      "grad_norm": 0.26962313055992126,
      "learning_rate": 7.96896169479e-05,
      "loss": 0.1874,
      "step": 4889
    },
    {
      "epoch": 1.8057607090103396,
      "grad_norm": 0.2779366672039032,
      "learning_rate": 7.96649833723365e-05,
      "loss": 0.212,
      "step": 4890
    },
    {
      "epoch": 1.8061299852289512,
      "grad_norm": 0.3361537754535675,
      "learning_rate": 7.964034979677301e-05,
      "loss": 0.2599,
      "step": 4891
    },
    {
      "epoch": 1.8064992614475628,
      "grad_norm": 0.2609734833240509,
      "learning_rate": 7.961571622120951e-05,
      "loss": 0.202,
      "step": 4892
    },
    {
      "epoch": 1.8068685376661744,
      "grad_norm": 0.28654301166534424,
      "learning_rate": 7.959108264564601e-05,
      "loss": 0.2053,
      "step": 4893
    },
    {
      "epoch": 1.8072378138847858,
      "grad_norm": 0.2660316526889801,
      "learning_rate": 7.956644907008253e-05,
      "loss": 0.2237,
      "step": 4894
    },
    {
      "epoch": 1.8076070901033974,
      "grad_norm": 0.28078603744506836,
      "learning_rate": 7.954181549451903e-05,
      "loss": 0.2098,
      "step": 4895
    },
    {
      "epoch": 1.8079763663220088,
      "grad_norm": 0.30883359909057617,
      "learning_rate": 7.951718191895554e-05,
      "loss": 0.2774,
      "step": 4896
    },
    {
      "epoch": 1.8083456425406204,
      "grad_norm": 0.2667382061481476,
      "learning_rate": 7.949254834339205e-05,
      "loss": 0.2029,
      "step": 4897
    },
    {
      "epoch": 1.808714918759232,
      "grad_norm": 0.24940231442451477,
      "learning_rate": 7.946791476782856e-05,
      "loss": 0.207,
      "step": 4898
    },
    {
      "epoch": 1.8090841949778436,
      "grad_norm": 0.2791793942451477,
      "learning_rate": 7.944328119226506e-05,
      "loss": 0.2278,
      "step": 4899
    },
    {
      "epoch": 1.809453471196455,
      "grad_norm": 0.30021530389785767,
      "learning_rate": 7.941864761670158e-05,
      "loss": 0.2268,
      "step": 4900
    },
    {
      "epoch": 1.809453471196455,
      "eval_loss": 0.25379928946495056,
      "eval_runtime": 5.8681,
      "eval_samples_per_second": 8.521,
      "eval_steps_per_second": 1.193,
      "step": 4900
    },
    {
      "epoch": 1.8098227474150663,
      "grad_norm": 0.27923181653022766,
      "learning_rate": 7.939401404113808e-05,
      "loss": 0.2709,
      "step": 4901
    },
    {
      "epoch": 1.810192023633678,
      "grad_norm": 0.26607418060302734,
      "learning_rate": 7.936938046557458e-05,
      "loss": 0.2299,
      "step": 4902
    },
    {
      "epoch": 1.8105612998522895,
      "grad_norm": 0.28474894165992737,
      "learning_rate": 7.934474689001109e-05,
      "loss": 0.2338,
      "step": 4903
    },
    {
      "epoch": 1.8109305760709011,
      "grad_norm": 0.3211461901664734,
      "learning_rate": 7.93201133144476e-05,
      "loss": 0.2629,
      "step": 4904
    },
    {
      "epoch": 1.8112998522895125,
      "grad_norm": 0.3524608612060547,
      "learning_rate": 7.929547973888411e-05,
      "loss": 0.2475,
      "step": 4905
    },
    {
      "epoch": 1.8116691285081241,
      "grad_norm": 0.3104952871799469,
      "learning_rate": 7.927084616332061e-05,
      "loss": 0.2325,
      "step": 4906
    },
    {
      "epoch": 1.8120384047267355,
      "grad_norm": 0.2545289099216461,
      "learning_rate": 7.924621258775712e-05,
      "loss": 0.234,
      "step": 4907
    },
    {
      "epoch": 1.812407680945347,
      "grad_norm": 0.37227439880371094,
      "learning_rate": 7.922157901219363e-05,
      "loss": 0.319,
      "step": 4908
    },
    {
      "epoch": 1.8127769571639587,
      "grad_norm": 0.23678454756736755,
      "learning_rate": 7.919694543663013e-05,
      "loss": 0.2029,
      "step": 4909
    },
    {
      "epoch": 1.8131462333825703,
      "grad_norm": 0.29453420639038086,
      "learning_rate": 7.917231186106664e-05,
      "loss": 0.2516,
      "step": 4910
    },
    {
      "epoch": 1.8135155096011817,
      "grad_norm": 0.298469215631485,
      "learning_rate": 7.914767828550314e-05,
      "loss": 0.2024,
      "step": 4911
    },
    {
      "epoch": 1.813884785819793,
      "grad_norm": 0.29158827662467957,
      "learning_rate": 7.912304470993966e-05,
      "loss": 0.2493,
      "step": 4912
    },
    {
      "epoch": 1.8142540620384047,
      "grad_norm": 0.36862993240356445,
      "learning_rate": 7.909841113437616e-05,
      "loss": 0.2848,
      "step": 4913
    },
    {
      "epoch": 1.8146233382570163,
      "grad_norm": 0.3295852541923523,
      "learning_rate": 7.907377755881267e-05,
      "loss": 0.2472,
      "step": 4914
    },
    {
      "epoch": 1.8149926144756279,
      "grad_norm": 0.2569772005081177,
      "learning_rate": 7.904914398324917e-05,
      "loss": 0.2247,
      "step": 4915
    },
    {
      "epoch": 1.8153618906942393,
      "grad_norm": 0.26352742314338684,
      "learning_rate": 7.902451040768567e-05,
      "loss": 0.2043,
      "step": 4916
    },
    {
      "epoch": 1.8157311669128509,
      "grad_norm": 0.2315554916858673,
      "learning_rate": 7.899987683212219e-05,
      "loss": 0.2121,
      "step": 4917
    },
    {
      "epoch": 1.8161004431314622,
      "grad_norm": 0.23188723623752594,
      "learning_rate": 7.897524325655869e-05,
      "loss": 0.1938,
      "step": 4918
    },
    {
      "epoch": 1.8164697193500738,
      "grad_norm": 0.2778877317905426,
      "learning_rate": 7.89506096809952e-05,
      "loss": 0.2,
      "step": 4919
    },
    {
      "epoch": 1.8168389955686854,
      "grad_norm": 0.2500912547111511,
      "learning_rate": 7.89259761054317e-05,
      "loss": 0.201,
      "step": 4920
    },
    {
      "epoch": 1.817208271787297,
      "grad_norm": 0.27916452288627625,
      "learning_rate": 7.890134252986822e-05,
      "loss": 0.2155,
      "step": 4921
    },
    {
      "epoch": 1.8175775480059084,
      "grad_norm": 0.24194051325321198,
      "learning_rate": 7.887670895430472e-05,
      "loss": 0.212,
      "step": 4922
    },
    {
      "epoch": 1.8179468242245198,
      "grad_norm": 0.2625460922718048,
      "learning_rate": 7.885207537874124e-05,
      "loss": 0.215,
      "step": 4923
    },
    {
      "epoch": 1.8183161004431314,
      "grad_norm": 0.30083608627319336,
      "learning_rate": 7.882744180317774e-05,
      "loss": 0.2235,
      "step": 4924
    },
    {
      "epoch": 1.818685376661743,
      "grad_norm": 0.24189461767673492,
      "learning_rate": 7.880280822761424e-05,
      "loss": 0.2138,
      "step": 4925
    },
    {
      "epoch": 1.8190546528803546,
      "grad_norm": 0.3320610821247101,
      "learning_rate": 7.877817465205075e-05,
      "loss": 0.256,
      "step": 4926
    },
    {
      "epoch": 1.819423929098966,
      "grad_norm": 0.31430310010910034,
      "learning_rate": 7.875354107648725e-05,
      "loss": 0.2411,
      "step": 4927
    },
    {
      "epoch": 1.8197932053175776,
      "grad_norm": 0.27846720814704895,
      "learning_rate": 7.872890750092377e-05,
      "loss": 0.2235,
      "step": 4928
    },
    {
      "epoch": 1.820162481536189,
      "grad_norm": 0.3182385265827179,
      "learning_rate": 7.870427392536027e-05,
      "loss": 0.253,
      "step": 4929
    },
    {
      "epoch": 1.8205317577548006,
      "grad_norm": 0.24252618849277496,
      "learning_rate": 7.867964034979678e-05,
      "loss": 0.2231,
      "step": 4930
    },
    {
      "epoch": 1.8209010339734122,
      "grad_norm": 0.2974163293838501,
      "learning_rate": 7.865500677423329e-05,
      "loss": 0.2217,
      "step": 4931
    },
    {
      "epoch": 1.8212703101920238,
      "grad_norm": 0.3330936133861542,
      "learning_rate": 7.863037319866979e-05,
      "loss": 0.2496,
      "step": 4932
    },
    {
      "epoch": 1.8216395864106352,
      "grad_norm": 0.23207621276378632,
      "learning_rate": 7.86057396231063e-05,
      "loss": 0.2205,
      "step": 4933
    },
    {
      "epoch": 1.8220088626292466,
      "grad_norm": 0.29393038153648376,
      "learning_rate": 7.85811060475428e-05,
      "loss": 0.2608,
      "step": 4934
    },
    {
      "epoch": 1.8223781388478582,
      "grad_norm": 0.32417479157447815,
      "learning_rate": 7.855647247197932e-05,
      "loss": 0.2421,
      "step": 4935
    },
    {
      "epoch": 1.8227474150664698,
      "grad_norm": 0.3282499313354492,
      "learning_rate": 7.853183889641582e-05,
      "loss": 0.2752,
      "step": 4936
    },
    {
      "epoch": 1.8231166912850814,
      "grad_norm": 0.2969152331352234,
      "learning_rate": 7.850720532085233e-05,
      "loss": 0.2614,
      "step": 4937
    },
    {
      "epoch": 1.8234859675036927,
      "grad_norm": 0.2571909427642822,
      "learning_rate": 7.848257174528883e-05,
      "loss": 0.1975,
      "step": 4938
    },
    {
      "epoch": 1.8238552437223041,
      "grad_norm": 0.3285469710826874,
      "learning_rate": 7.845793816972535e-05,
      "loss": 0.2396,
      "step": 4939
    },
    {
      "epoch": 1.8242245199409157,
      "grad_norm": 0.2782268524169922,
      "learning_rate": 7.843330459416185e-05,
      "loss": 0.1893,
      "step": 4940
    },
    {
      "epoch": 1.8245937961595273,
      "grad_norm": 0.25318440794944763,
      "learning_rate": 7.840867101859835e-05,
      "loss": 0.1934,
      "step": 4941
    },
    {
      "epoch": 1.824963072378139,
      "grad_norm": 0.2724683880805969,
      "learning_rate": 7.838403744303487e-05,
      "loss": 0.2136,
      "step": 4942
    },
    {
      "epoch": 1.8253323485967505,
      "grad_norm": 0.23463191092014313,
      "learning_rate": 7.835940386747137e-05,
      "loss": 0.1947,
      "step": 4943
    },
    {
      "epoch": 1.825701624815362,
      "grad_norm": 0.2644791007041931,
      "learning_rate": 7.833477029190788e-05,
      "loss": 0.2036,
      "step": 4944
    },
    {
      "epoch": 1.8260709010339733,
      "grad_norm": 0.32884877920150757,
      "learning_rate": 7.831013671634438e-05,
      "loss": 0.256,
      "step": 4945
    },
    {
      "epoch": 1.826440177252585,
      "grad_norm": 0.27714481949806213,
      "learning_rate": 7.82855031407809e-05,
      "loss": 0.22,
      "step": 4946
    },
    {
      "epoch": 1.8268094534711965,
      "grad_norm": 0.3246936798095703,
      "learning_rate": 7.82608695652174e-05,
      "loss": 0.2527,
      "step": 4947
    },
    {
      "epoch": 1.827178729689808,
      "grad_norm": 0.23440565168857574,
      "learning_rate": 7.82362359896539e-05,
      "loss": 0.1986,
      "step": 4948
    },
    {
      "epoch": 1.8275480059084195,
      "grad_norm": 0.22908934950828552,
      "learning_rate": 7.821160241409041e-05,
      "loss": 0.1855,
      "step": 4949
    },
    {
      "epoch": 1.8279172821270309,
      "grad_norm": 0.33465614914894104,
      "learning_rate": 7.818696883852691e-05,
      "loss": 0.2077,
      "step": 4950
    },
    {
      "epoch": 1.8279172821270309,
      "eval_loss": 0.2532367408275604,
      "eval_runtime": 5.8662,
      "eval_samples_per_second": 8.523,
      "eval_steps_per_second": 1.193,
      "step": 4950
    },
    {
      "epoch": 1.8282865583456425,
      "grad_norm": 0.22871074080467224,
      "learning_rate": 7.816233526296343e-05,
      "loss": 0.1997,
      "step": 4951
    },
    {
      "epoch": 1.828655834564254,
      "grad_norm": 0.28895342350006104,
      "learning_rate": 7.813770168739993e-05,
      "loss": 0.2432,
      "step": 4952
    },
    {
      "epoch": 1.8290251107828657,
      "grad_norm": 0.30292877554893494,
      "learning_rate": 7.811306811183645e-05,
      "loss": 0.2621,
      "step": 4953
    },
    {
      "epoch": 1.829394387001477,
      "grad_norm": 0.26060134172439575,
      "learning_rate": 7.808843453627295e-05,
      "loss": 0.1941,
      "step": 4954
    },
    {
      "epoch": 1.8297636632200887,
      "grad_norm": 0.29909786581993103,
      "learning_rate": 7.806380096070946e-05,
      "loss": 0.233,
      "step": 4955
    },
    {
      "epoch": 1.8301329394387,
      "grad_norm": 0.29001250863075256,
      "learning_rate": 7.803916738514596e-05,
      "loss": 0.2142,
      "step": 4956
    },
    {
      "epoch": 1.8305022156573116,
      "grad_norm": 0.30807846784591675,
      "learning_rate": 7.801453380958246e-05,
      "loss": 0.2308,
      "step": 4957
    },
    {
      "epoch": 1.8308714918759232,
      "grad_norm": 0.26788103580474854,
      "learning_rate": 7.798990023401898e-05,
      "loss": 0.2613,
      "step": 4958
    },
    {
      "epoch": 1.8312407680945348,
      "grad_norm": 0.24326567351818085,
      "learning_rate": 7.796526665845548e-05,
      "loss": 0.2056,
      "step": 4959
    },
    {
      "epoch": 1.8316100443131462,
      "grad_norm": 0.21416209638118744,
      "learning_rate": 7.7940633082892e-05,
      "loss": 0.2035,
      "step": 4960
    },
    {
      "epoch": 1.8319793205317576,
      "grad_norm": 0.28870993852615356,
      "learning_rate": 7.79159995073285e-05,
      "loss": 0.2139,
      "step": 4961
    },
    {
      "epoch": 1.8323485967503692,
      "grad_norm": 0.27659568190574646,
      "learning_rate": 7.789136593176501e-05,
      "loss": 0.2399,
      "step": 4962
    },
    {
      "epoch": 1.8327178729689808,
      "grad_norm": 0.2337316870689392,
      "learning_rate": 7.786673235620151e-05,
      "loss": 0.2157,
      "step": 4963
    },
    {
      "epoch": 1.8330871491875924,
      "grad_norm": 0.2649693489074707,
      "learning_rate": 7.784209878063801e-05,
      "loss": 0.2153,
      "step": 4964
    },
    {
      "epoch": 1.8334564254062038,
      "grad_norm": 0.3539893925189972,
      "learning_rate": 7.781746520507453e-05,
      "loss": 0.2891,
      "step": 4965
    },
    {
      "epoch": 1.8338257016248154,
      "grad_norm": 0.2172566056251526,
      "learning_rate": 7.779283162951103e-05,
      "loss": 0.1724,
      "step": 4966
    },
    {
      "epoch": 1.8341949778434268,
      "grad_norm": 0.24495668709278107,
      "learning_rate": 7.776819805394754e-05,
      "loss": 0.2153,
      "step": 4967
    },
    {
      "epoch": 1.8345642540620384,
      "grad_norm": 0.2146148979663849,
      "learning_rate": 7.774356447838404e-05,
      "loss": 0.1984,
      "step": 4968
    },
    {
      "epoch": 1.83493353028065,
      "grad_norm": 0.2669045627117157,
      "learning_rate": 7.771893090282056e-05,
      "loss": 0.2334,
      "step": 4969
    },
    {
      "epoch": 1.8353028064992616,
      "grad_norm": 0.2946428656578064,
      "learning_rate": 7.769429732725706e-05,
      "loss": 0.2593,
      "step": 4970
    },
    {
      "epoch": 1.835672082717873,
      "grad_norm": 0.21473877131938934,
      "learning_rate": 7.766966375169357e-05,
      "loss": 0.159,
      "step": 4971
    },
    {
      "epoch": 1.8360413589364843,
      "grad_norm": 0.2652877867221832,
      "learning_rate": 7.764503017613007e-05,
      "loss": 0.2641,
      "step": 4972
    },
    {
      "epoch": 1.836410635155096,
      "grad_norm": 0.30034440755844116,
      "learning_rate": 7.762039660056658e-05,
      "loss": 0.2464,
      "step": 4973
    },
    {
      "epoch": 1.8367799113737076,
      "grad_norm": 0.2821067273616791,
      "learning_rate": 7.759576302500309e-05,
      "loss": 0.2093,
      "step": 4974
    },
    {
      "epoch": 1.8371491875923192,
      "grad_norm": 0.2559252381324768,
      "learning_rate": 7.757112944943959e-05,
      "loss": 0.197,
      "step": 4975
    },
    {
      "epoch": 1.8375184638109305,
      "grad_norm": 0.2899158000946045,
      "learning_rate": 7.75464958738761e-05,
      "loss": 0.2441,
      "step": 4976
    },
    {
      "epoch": 1.8378877400295421,
      "grad_norm": 0.2630823254585266,
      "learning_rate": 7.752186229831261e-05,
      "loss": 0.2376,
      "step": 4977
    },
    {
      "epoch": 1.8382570162481535,
      "grad_norm": 0.25868675112724304,
      "learning_rate": 7.749722872274912e-05,
      "loss": 0.2282,
      "step": 4978
    },
    {
      "epoch": 1.8386262924667651,
      "grad_norm": 0.30225345492362976,
      "learning_rate": 7.747259514718562e-05,
      "loss": 0.2634,
      "step": 4979
    },
    {
      "epoch": 1.8389955686853767,
      "grad_norm": 0.3172706067562103,
      "learning_rate": 7.744796157162212e-05,
      "loss": 0.221,
      "step": 4980
    },
    {
      "epoch": 1.8393648449039883,
      "grad_norm": 0.2501530945301056,
      "learning_rate": 7.742332799605864e-05,
      "loss": 0.21,
      "step": 4981
    },
    {
      "epoch": 1.8397341211225997,
      "grad_norm": 0.2722257077693939,
      "learning_rate": 7.739869442049514e-05,
      "loss": 0.241,
      "step": 4982
    },
    {
      "epoch": 1.840103397341211,
      "grad_norm": 0.25727182626724243,
      "learning_rate": 7.737406084493165e-05,
      "loss": 0.223,
      "step": 4983
    },
    {
      "epoch": 1.8404726735598227,
      "grad_norm": 0.2605494260787964,
      "learning_rate": 7.734942726936816e-05,
      "loss": 0.1922,
      "step": 4984
    },
    {
      "epoch": 1.8408419497784343,
      "grad_norm": 0.2520425617694855,
      "learning_rate": 7.732479369380467e-05,
      "loss": 0.2214,
      "step": 4985
    },
    {
      "epoch": 1.841211225997046,
      "grad_norm": 0.2699052393436432,
      "learning_rate": 7.730016011824117e-05,
      "loss": 0.2277,
      "step": 4986
    },
    {
      "epoch": 1.8415805022156573,
      "grad_norm": 0.24361835420131683,
      "learning_rate": 7.727552654267769e-05,
      "loss": 0.2008,
      "step": 4987
    },
    {
      "epoch": 1.8419497784342689,
      "grad_norm": 0.2995036244392395,
      "learning_rate": 7.725089296711419e-05,
      "loss": 0.2413,
      "step": 4988
    },
    {
      "epoch": 1.8423190546528803,
      "grad_norm": 0.31261393427848816,
      "learning_rate": 7.722625939155069e-05,
      "loss": 0.2105,
      "step": 4989
    },
    {
      "epoch": 1.8426883308714919,
      "grad_norm": 0.3322518467903137,
      "learning_rate": 7.72016258159872e-05,
      "loss": 0.2204,
      "step": 4990
    },
    {
      "epoch": 1.8430576070901035,
      "grad_norm": 0.24115926027297974,
      "learning_rate": 7.71769922404237e-05,
      "loss": 0.2078,
      "step": 4991
    },
    {
      "epoch": 1.843426883308715,
      "grad_norm": 0.2732994258403778,
      "learning_rate": 7.715235866486022e-05,
      "loss": 0.2078,
      "step": 4992
    },
    {
      "epoch": 1.8437961595273265,
      "grad_norm": 0.26314637064933777,
      "learning_rate": 7.71277250892967e-05,
      "loss": 0.2212,
      "step": 4993
    },
    {
      "epoch": 1.8441654357459378,
      "grad_norm": 0.2788105010986328,
      "learning_rate": 7.710309151373322e-05,
      "loss": 0.2082,
      "step": 4994
    },
    {
      "epoch": 1.8445347119645494,
      "grad_norm": 0.2987094521522522,
      "learning_rate": 7.707845793816972e-05,
      "loss": 0.2213,
      "step": 4995
    },
    {
      "epoch": 1.844903988183161,
      "grad_norm": 0.29209956526756287,
      "learning_rate": 7.705382436260624e-05,
      "loss": 0.2482,
      "step": 4996
    },
    {
      "epoch": 1.8452732644017726,
      "grad_norm": 0.33971697092056274,
      "learning_rate": 7.702919078704274e-05,
      "loss": 0.2339,
      "step": 4997
    },
    {
      "epoch": 1.845642540620384,
      "grad_norm": 0.27986255288124084,
      "learning_rate": 7.700455721147924e-05,
      "loss": 0.2744,
      "step": 4998
    },
    {
      "epoch": 1.8460118168389956,
      "grad_norm": 0.2867780923843384,
      "learning_rate": 7.697992363591575e-05,
      "loss": 0.2315,
      "step": 4999
    },
    {
      "epoch": 1.846381093057607,
      "grad_norm": 0.26802054047584534,
      "learning_rate": 7.695529006035225e-05,
      "loss": 0.1738,
      "step": 5000
    },
    {
      "epoch": 1.846381093057607,
      "eval_loss": 0.25385627150535583,
      "eval_runtime": 5.861,
      "eval_samples_per_second": 8.531,
      "eval_steps_per_second": 1.194,
      "step": 5000
    },
    {
      "epoch": 1.8467503692762186,
      "grad_norm": 0.2724579870700836,
      "learning_rate": 7.693065648478877e-05,
      "loss": 0.2288,
      "step": 5001
    },
    {
      "epoch": 1.8471196454948302,
      "grad_norm": 0.2348249852657318,
      "learning_rate": 7.690602290922527e-05,
      "loss": 0.1983,
      "step": 5002
    },
    {
      "epoch": 1.8474889217134418,
      "grad_norm": 0.25820598006248474,
      "learning_rate": 7.688138933366178e-05,
      "loss": 0.2524,
      "step": 5003
    },
    {
      "epoch": 1.8478581979320532,
      "grad_norm": 0.27874141931533813,
      "learning_rate": 7.685675575809829e-05,
      "loss": 0.235,
      "step": 5004
    },
    {
      "epoch": 1.8482274741506646,
      "grad_norm": 0.24114708602428436,
      "learning_rate": 7.68321221825348e-05,
      "loss": 0.1999,
      "step": 5005
    },
    {
      "epoch": 1.8485967503692762,
      "grad_norm": 0.282846599817276,
      "learning_rate": 7.68074886069713e-05,
      "loss": 0.2693,
      "step": 5006
    },
    {
      "epoch": 1.8489660265878878,
      "grad_norm": 0.2789778709411621,
      "learning_rate": 7.67828550314078e-05,
      "loss": 0.2348,
      "step": 5007
    },
    {
      "epoch": 1.8493353028064994,
      "grad_norm": 0.30808788537979126,
      "learning_rate": 7.675822145584432e-05,
      "loss": 0.2801,
      "step": 5008
    },
    {
      "epoch": 1.8497045790251108,
      "grad_norm": 0.2590267062187195,
      "learning_rate": 7.673358788028082e-05,
      "loss": 0.2391,
      "step": 5009
    },
    {
      "epoch": 1.8500738552437221,
      "grad_norm": 0.3276561200618744,
      "learning_rate": 7.670895430471733e-05,
      "loss": 0.2578,
      "step": 5010
    },
    {
      "epoch": 1.8504431314623337,
      "grad_norm": 0.4562641680240631,
      "learning_rate": 7.668432072915383e-05,
      "loss": 0.2748,
      "step": 5011
    },
    {
      "epoch": 1.8508124076809453,
      "grad_norm": 0.24998047947883606,
      "learning_rate": 7.665968715359035e-05,
      "loss": 0.2161,
      "step": 5012
    },
    {
      "epoch": 1.851181683899557,
      "grad_norm": 0.2959882915019989,
      "learning_rate": 7.663505357802685e-05,
      "loss": 0.2769,
      "step": 5013
    },
    {
      "epoch": 1.8515509601181686,
      "grad_norm": 0.2754557728767395,
      "learning_rate": 7.661042000246335e-05,
      "loss": 0.2096,
      "step": 5014
    },
    {
      "epoch": 1.85192023633678,
      "grad_norm": 0.2741212844848633,
      "learning_rate": 7.658578642689986e-05,
      "loss": 0.2303,
      "step": 5015
    },
    {
      "epoch": 1.8522895125553913,
      "grad_norm": 0.2592034935951233,
      "learning_rate": 7.656115285133637e-05,
      "loss": 0.2072,
      "step": 5016
    },
    {
      "epoch": 1.852658788774003,
      "grad_norm": 0.27041250467300415,
      "learning_rate": 7.653651927577288e-05,
      "loss": 0.2168,
      "step": 5017
    },
    {
      "epoch": 1.8530280649926145,
      "grad_norm": 0.2315387725830078,
      "learning_rate": 7.651188570020938e-05,
      "loss": 0.1993,
      "step": 5018
    },
    {
      "epoch": 1.8533973412112261,
      "grad_norm": 0.2641536295413971,
      "learning_rate": 7.64872521246459e-05,
      "loss": 0.2271,
      "step": 5019
    },
    {
      "epoch": 1.8537666174298375,
      "grad_norm": 0.28545939922332764,
      "learning_rate": 7.64626185490824e-05,
      "loss": 0.2229,
      "step": 5020
    },
    {
      "epoch": 1.8541358936484489,
      "grad_norm": 0.2999255657196045,
      "learning_rate": 7.643798497351891e-05,
      "loss": 0.245,
      "step": 5021
    },
    {
      "epoch": 1.8545051698670605,
      "grad_norm": 0.25720641016960144,
      "learning_rate": 7.641335139795541e-05,
      "loss": 0.212,
      "step": 5022
    },
    {
      "epoch": 1.854874446085672,
      "grad_norm": 0.2911759316921234,
      "learning_rate": 7.638871782239191e-05,
      "loss": 0.2396,
      "step": 5023
    },
    {
      "epoch": 1.8552437223042837,
      "grad_norm": 0.2456437200307846,
      "learning_rate": 7.636408424682843e-05,
      "loss": 0.2242,
      "step": 5024
    },
    {
      "epoch": 1.855612998522895,
      "grad_norm": 0.23431618511676788,
      "learning_rate": 7.633945067126493e-05,
      "loss": 0.2165,
      "step": 5025
    },
    {
      "epoch": 1.8559822747415067,
      "grad_norm": 0.28363487124443054,
      "learning_rate": 7.631481709570144e-05,
      "loss": 0.2407,
      "step": 5026
    },
    {
      "epoch": 1.856351550960118,
      "grad_norm": 0.2409060150384903,
      "learning_rate": 7.629018352013795e-05,
      "loss": 0.2017,
      "step": 5027
    },
    {
      "epoch": 1.8567208271787297,
      "grad_norm": 0.3640720546245575,
      "learning_rate": 7.626554994457446e-05,
      "loss": 0.2299,
      "step": 5028
    },
    {
      "epoch": 1.8570901033973413,
      "grad_norm": 0.2626873850822449,
      "learning_rate": 7.624091636901096e-05,
      "loss": 0.1962,
      "step": 5029
    },
    {
      "epoch": 1.8574593796159529,
      "grad_norm": 0.2808993458747864,
      "learning_rate": 7.621628279344746e-05,
      "loss": 0.224,
      "step": 5030
    },
    {
      "epoch": 1.8578286558345642,
      "grad_norm": 0.23122955858707428,
      "learning_rate": 7.619164921788398e-05,
      "loss": 0.1914,
      "step": 5031
    },
    {
      "epoch": 1.8581979320531756,
      "grad_norm": 0.2904467284679413,
      "learning_rate": 7.616701564232048e-05,
      "loss": 0.2252,
      "step": 5032
    },
    {
      "epoch": 1.8585672082717872,
      "grad_norm": 0.27927500009536743,
      "learning_rate": 7.614238206675699e-05,
      "loss": 0.211,
      "step": 5033
    },
    {
      "epoch": 1.8589364844903988,
      "grad_norm": 0.3011045455932617,
      "learning_rate": 7.61177484911935e-05,
      "loss": 0.235,
      "step": 5034
    },
    {
      "epoch": 1.8593057607090104,
      "grad_norm": 0.28089842200279236,
      "learning_rate": 7.609311491563001e-05,
      "loss": 0.2617,
      "step": 5035
    },
    {
      "epoch": 1.8596750369276218,
      "grad_norm": 0.2582519054412842,
      "learning_rate": 7.606848134006651e-05,
      "loss": 0.2072,
      "step": 5036
    },
    {
      "epoch": 1.8600443131462334,
      "grad_norm": 0.3025238513946533,
      "learning_rate": 7.604384776450302e-05,
      "loss": 0.2946,
      "step": 5037
    },
    {
      "epoch": 1.8604135893648448,
      "grad_norm": 0.276224821805954,
      "learning_rate": 7.601921418893953e-05,
      "loss": 0.2209,
      "step": 5038
    },
    {
      "epoch": 1.8607828655834564,
      "grad_norm": 0.23579522967338562,
      "learning_rate": 7.599458061337603e-05,
      "loss": 0.206,
      "step": 5039
    },
    {
      "epoch": 1.861152141802068,
      "grad_norm": 0.29115140438079834,
      "learning_rate": 7.596994703781254e-05,
      "loss": 0.2361,
      "step": 5040
    },
    {
      "epoch": 1.8615214180206796,
      "grad_norm": 0.2666078209877014,
      "learning_rate": 7.594531346224904e-05,
      "loss": 0.2266,
      "step": 5041
    },
    {
      "epoch": 1.861890694239291,
      "grad_norm": 0.2762908637523651,
      "learning_rate": 7.592067988668556e-05,
      "loss": 0.2058,
      "step": 5042
    },
    {
      "epoch": 1.8622599704579024,
      "grad_norm": 0.2928195297718048,
      "learning_rate": 7.589604631112206e-05,
      "loss": 0.219,
      "step": 5043
    },
    {
      "epoch": 1.862629246676514,
      "grad_norm": 0.31124547123908997,
      "learning_rate": 7.587141273555857e-05,
      "loss": 0.256,
      "step": 5044
    },
    {
      "epoch": 1.8629985228951256,
      "grad_norm": 0.2583990693092346,
      "learning_rate": 7.584677915999507e-05,
      "loss": 0.2116,
      "step": 5045
    },
    {
      "epoch": 1.8633677991137372,
      "grad_norm": 0.291933536529541,
      "learning_rate": 7.582214558443157e-05,
      "loss": 0.2408,
      "step": 5046
    },
    {
      "epoch": 1.8637370753323486,
      "grad_norm": 0.28724706172943115,
      "learning_rate": 7.579751200886809e-05,
      "loss": 0.2101,
      "step": 5047
    },
    {
      "epoch": 1.8641063515509602,
      "grad_norm": 0.23857556283473969,
      "learning_rate": 7.577287843330459e-05,
      "loss": 0.2056,
      "step": 5048
    },
    {
      "epoch": 1.8644756277695715,
      "grad_norm": 0.29885801672935486,
      "learning_rate": 7.57482448577411e-05,
      "loss": 0.2172,
      "step": 5049
    },
    {
      "epoch": 1.8648449039881831,
      "grad_norm": 0.3281831741333008,
      "learning_rate": 7.57236112821776e-05,
      "loss": 0.2321,
      "step": 5050
    },
    {
      "epoch": 1.8648449039881831,
      "eval_loss": 0.2508034110069275,
      "eval_runtime": 5.8639,
      "eval_samples_per_second": 8.527,
      "eval_steps_per_second": 1.194,
      "step": 5050
    },
    {
      "epoch": 1.8652141802067947,
      "grad_norm": 0.3108578026294708,
      "learning_rate": 7.569897770661412e-05,
      "loss": 0.2422,
      "step": 5051
    },
    {
      "epoch": 1.8655834564254064,
      "grad_norm": 0.3225805163383484,
      "learning_rate": 7.567434413105062e-05,
      "loss": 0.2449,
      "step": 5052
    },
    {
      "epoch": 1.8659527326440177,
      "grad_norm": 0.23075884580612183,
      "learning_rate": 7.564971055548714e-05,
      "loss": 0.2048,
      "step": 5053
    },
    {
      "epoch": 1.8663220088626291,
      "grad_norm": 0.2535512149333954,
      "learning_rate": 7.562507697992364e-05,
      "loss": 0.2184,
      "step": 5054
    },
    {
      "epoch": 1.8666912850812407,
      "grad_norm": 0.21854400634765625,
      "learning_rate": 7.560044340436014e-05,
      "loss": 0.1803,
      "step": 5055
    },
    {
      "epoch": 1.8670605612998523,
      "grad_norm": 0.30747005343437195,
      "learning_rate": 7.557580982879665e-05,
      "loss": 0.2439,
      "step": 5056
    },
    {
      "epoch": 1.867429837518464,
      "grad_norm": 0.25755876302719116,
      "learning_rate": 7.555117625323315e-05,
      "loss": 0.216,
      "step": 5057
    },
    {
      "epoch": 1.8677991137370753,
      "grad_norm": 0.2670411765575409,
      "learning_rate": 7.552654267766967e-05,
      "loss": 0.2101,
      "step": 5058
    },
    {
      "epoch": 1.868168389955687,
      "grad_norm": 0.23956617712974548,
      "learning_rate": 7.550190910210617e-05,
      "loss": 0.2226,
      "step": 5059
    },
    {
      "epoch": 1.8685376661742983,
      "grad_norm": 0.24762064218521118,
      "learning_rate": 7.547727552654268e-05,
      "loss": 0.2119,
      "step": 5060
    },
    {
      "epoch": 1.8689069423929099,
      "grad_norm": 0.23967112600803375,
      "learning_rate": 7.545264195097919e-05,
      "loss": 0.2027,
      "step": 5061
    },
    {
      "epoch": 1.8692762186115215,
      "grad_norm": 0.25305482745170593,
      "learning_rate": 7.542800837541569e-05,
      "loss": 0.2391,
      "step": 5062
    },
    {
      "epoch": 1.869645494830133,
      "grad_norm": 0.3014960289001465,
      "learning_rate": 7.54033747998522e-05,
      "loss": 0.226,
      "step": 5063
    },
    {
      "epoch": 1.8700147710487445,
      "grad_norm": 0.3208469748497009,
      "learning_rate": 7.53787412242887e-05,
      "loss": 0.2194,
      "step": 5064
    },
    {
      "epoch": 1.8703840472673559,
      "grad_norm": 0.3345724642276764,
      "learning_rate": 7.535410764872522e-05,
      "loss": 0.252,
      "step": 5065
    },
    {
      "epoch": 1.8707533234859675,
      "grad_norm": 0.2663443386554718,
      "learning_rate": 7.532947407316172e-05,
      "loss": 0.2241,
      "step": 5066
    },
    {
      "epoch": 1.871122599704579,
      "grad_norm": 0.27943527698516846,
      "learning_rate": 7.530484049759823e-05,
      "loss": 0.2229,
      "step": 5067
    },
    {
      "epoch": 1.8714918759231907,
      "grad_norm": 0.26502811908721924,
      "learning_rate": 7.528020692203473e-05,
      "loss": 0.2471,
      "step": 5068
    },
    {
      "epoch": 1.871861152141802,
      "grad_norm": 0.3713107705116272,
      "learning_rate": 7.525557334647124e-05,
      "loss": 0.2697,
      "step": 5069
    },
    {
      "epoch": 1.8722304283604134,
      "grad_norm": 0.2940278947353363,
      "learning_rate": 7.523093977090775e-05,
      "loss": 0.2426,
      "step": 5070
    },
    {
      "epoch": 1.872599704579025,
      "grad_norm": 0.27475041151046753,
      "learning_rate": 7.520630619534425e-05,
      "loss": 0.2441,
      "step": 5071
    },
    {
      "epoch": 1.8729689807976366,
      "grad_norm": 0.3016396462917328,
      "learning_rate": 7.518167261978077e-05,
      "loss": 0.2287,
      "step": 5072
    },
    {
      "epoch": 1.8733382570162482,
      "grad_norm": 0.2879699170589447,
      "learning_rate": 7.515703904421727e-05,
      "loss": 0.2825,
      "step": 5073
    },
    {
      "epoch": 1.8737075332348598,
      "grad_norm": 0.32314956188201904,
      "learning_rate": 7.513240546865378e-05,
      "loss": 0.2476,
      "step": 5074
    },
    {
      "epoch": 1.8740768094534712,
      "grad_norm": 0.3410947322845459,
      "learning_rate": 7.510777189309028e-05,
      "loss": 0.2736,
      "step": 5075
    },
    {
      "epoch": 1.8744460856720826,
      "grad_norm": 0.30659565329551697,
      "learning_rate": 7.50831383175268e-05,
      "loss": 0.2425,
      "step": 5076
    },
    {
      "epoch": 1.8748153618906942,
      "grad_norm": 0.2589021623134613,
      "learning_rate": 7.50585047419633e-05,
      "loss": 0.2123,
      "step": 5077
    },
    {
      "epoch": 1.8751846381093058,
      "grad_norm": 0.2562973201274872,
      "learning_rate": 7.50338711663998e-05,
      "loss": 0.215,
      "step": 5078
    },
    {
      "epoch": 1.8755539143279174,
      "grad_norm": 0.26437804102897644,
      "learning_rate": 7.500923759083631e-05,
      "loss": 0.2137,
      "step": 5079
    },
    {
      "epoch": 1.8759231905465288,
      "grad_norm": 0.3351561427116394,
      "learning_rate": 7.498460401527282e-05,
      "loss": 0.2973,
      "step": 5080
    },
    {
      "epoch": 1.8762924667651402,
      "grad_norm": 0.26624685525894165,
      "learning_rate": 7.495997043970933e-05,
      "loss": 0.2211,
      "step": 5081
    },
    {
      "epoch": 1.8766617429837518,
      "grad_norm": 0.3059654235839844,
      "learning_rate": 7.493533686414583e-05,
      "loss": 0.2444,
      "step": 5082
    },
    {
      "epoch": 1.8770310192023634,
      "grad_norm": 0.2658170461654663,
      "learning_rate": 7.491070328858235e-05,
      "loss": 0.1953,
      "step": 5083
    },
    {
      "epoch": 1.877400295420975,
      "grad_norm": 0.21191762387752533,
      "learning_rate": 7.488606971301885e-05,
      "loss": 0.1851,
      "step": 5084
    },
    {
      "epoch": 1.8777695716395866,
      "grad_norm": 0.29865381121635437,
      "learning_rate": 7.486143613745535e-05,
      "loss": 0.2357,
      "step": 5085
    },
    {
      "epoch": 1.878138847858198,
      "grad_norm": 0.26615944504737854,
      "learning_rate": 7.483680256189186e-05,
      "loss": 0.2252,
      "step": 5086
    },
    {
      "epoch": 1.8785081240768093,
      "grad_norm": 0.27517449855804443,
      "learning_rate": 7.481216898632836e-05,
      "loss": 0.1947,
      "step": 5087
    },
    {
      "epoch": 1.878877400295421,
      "grad_norm": 0.2770524024963379,
      "learning_rate": 7.478753541076488e-05,
      "loss": 0.2113,
      "step": 5088
    },
    {
      "epoch": 1.8792466765140325,
      "grad_norm": 0.31695282459259033,
      "learning_rate": 7.476290183520138e-05,
      "loss": 0.2874,
      "step": 5089
    },
    {
      "epoch": 1.8796159527326441,
      "grad_norm": 0.2557409405708313,
      "learning_rate": 7.47382682596379e-05,
      "loss": 0.2178,
      "step": 5090
    },
    {
      "epoch": 1.8799852289512555,
      "grad_norm": 0.2620340585708618,
      "learning_rate": 7.47136346840744e-05,
      "loss": 0.2342,
      "step": 5091
    },
    {
      "epoch": 1.880354505169867,
      "grad_norm": 0.31770604848861694,
      "learning_rate": 7.468900110851091e-05,
      "loss": 0.1944,
      "step": 5092
    },
    {
      "epoch": 1.8807237813884785,
      "grad_norm": 0.27841833233833313,
      "learning_rate": 7.466436753294741e-05,
      "loss": 0.2305,
      "step": 5093
    },
    {
      "epoch": 1.8810930576070901,
      "grad_norm": 0.3052809536457062,
      "learning_rate": 7.463973395738391e-05,
      "loss": 0.2707,
      "step": 5094
    },
    {
      "epoch": 1.8814623338257017,
      "grad_norm": 0.2491428405046463,
      "learning_rate": 7.461510038182043e-05,
      "loss": 0.2319,
      "step": 5095
    },
    {
      "epoch": 1.881831610044313,
      "grad_norm": 0.25979429483413696,
      "learning_rate": 7.459046680625693e-05,
      "loss": 0.2145,
      "step": 5096
    },
    {
      "epoch": 1.8822008862629247,
      "grad_norm": 0.25400927662849426,
      "learning_rate": 7.456583323069344e-05,
      "loss": 0.2167,
      "step": 5097
    },
    {
      "epoch": 1.882570162481536,
      "grad_norm": 0.31190866231918335,
      "learning_rate": 7.454119965512994e-05,
      "loss": 0.1903,
      "step": 5098
    },
    {
      "epoch": 1.8829394387001477,
      "grad_norm": 0.3358284831047058,
      "learning_rate": 7.451656607956646e-05,
      "loss": 0.2728,
      "step": 5099
    },
    {
      "epoch": 1.8833087149187593,
      "grad_norm": 0.2279479205608368,
      "learning_rate": 7.449193250400296e-05,
      "loss": 0.181,
      "step": 5100
    },
    {
      "epoch": 1.8833087149187593,
      "eval_loss": 0.25126925110816956,
      "eval_runtime": 5.8627,
      "eval_samples_per_second": 8.529,
      "eval_steps_per_second": 1.194,
      "step": 5100
    },
    {
      "epoch": 1.8836779911373709,
      "grad_norm": 0.27150535583496094,
      "learning_rate": 7.446729892843946e-05,
      "loss": 0.2157,
      "step": 5101
    },
    {
      "epoch": 1.8840472673559823,
      "grad_norm": 0.2573368549346924,
      "learning_rate": 7.444266535287597e-05,
      "loss": 0.2291,
      "step": 5102
    },
    {
      "epoch": 1.8844165435745936,
      "grad_norm": 0.28329354524612427,
      "learning_rate": 7.441803177731248e-05,
      "loss": 0.2202,
      "step": 5103
    },
    {
      "epoch": 1.8847858197932053,
      "grad_norm": 0.30760088562965393,
      "learning_rate": 7.439339820174899e-05,
      "loss": 0.2167,
      "step": 5104
    },
    {
      "epoch": 1.8851550960118169,
      "grad_norm": 0.30110180377960205,
      "learning_rate": 7.436876462618549e-05,
      "loss": 0.2096,
      "step": 5105
    },
    {
      "epoch": 1.8855243722304285,
      "grad_norm": 0.2775033116340637,
      "learning_rate": 7.4344131050622e-05,
      "loss": 0.2281,
      "step": 5106
    },
    {
      "epoch": 1.8858936484490398,
      "grad_norm": 0.2764638364315033,
      "learning_rate": 7.431949747505851e-05,
      "loss": 0.1961,
      "step": 5107
    },
    {
      "epoch": 1.8862629246676514,
      "grad_norm": 0.24575462937355042,
      "learning_rate": 7.429486389949502e-05,
      "loss": 0.1838,
      "step": 5108
    },
    {
      "epoch": 1.8866322008862628,
      "grad_norm": 0.3272894322872162,
      "learning_rate": 7.427023032393152e-05,
      "loss": 0.2512,
      "step": 5109
    },
    {
      "epoch": 1.8870014771048744,
      "grad_norm": 0.2593172788619995,
      "learning_rate": 7.424559674836802e-05,
      "loss": 0.2513,
      "step": 5110
    },
    {
      "epoch": 1.887370753323486,
      "grad_norm": 0.25487715005874634,
      "learning_rate": 7.422096317280454e-05,
      "loss": 0.209,
      "step": 5111
    },
    {
      "epoch": 1.8877400295420976,
      "grad_norm": 0.26173579692840576,
      "learning_rate": 7.419632959724104e-05,
      "loss": 0.2293,
      "step": 5112
    },
    {
      "epoch": 1.888109305760709,
      "grad_norm": 0.3187013864517212,
      "learning_rate": 7.417169602167755e-05,
      "loss": 0.2772,
      "step": 5113
    },
    {
      "epoch": 1.8884785819793204,
      "grad_norm": 0.3143283724784851,
      "learning_rate": 7.414706244611406e-05,
      "loss": 0.1915,
      "step": 5114
    },
    {
      "epoch": 1.888847858197932,
      "grad_norm": 0.30088192224502563,
      "learning_rate": 7.412242887055057e-05,
      "loss": 0.2472,
      "step": 5115
    },
    {
      "epoch": 1.8892171344165436,
      "grad_norm": 0.2510607838630676,
      "learning_rate": 7.409779529498707e-05,
      "loss": 0.2135,
      "step": 5116
    },
    {
      "epoch": 1.8895864106351552,
      "grad_norm": 0.2626101076602936,
      "learning_rate": 7.407316171942357e-05,
      "loss": 0.2145,
      "step": 5117
    },
    {
      "epoch": 1.8899556868537666,
      "grad_norm": 0.2522996962070465,
      "learning_rate": 7.404852814386009e-05,
      "loss": 0.1827,
      "step": 5118
    },
    {
      "epoch": 1.8903249630723782,
      "grad_norm": 0.2846025824546814,
      "learning_rate": 7.402389456829659e-05,
      "loss": 0.2304,
      "step": 5119
    },
    {
      "epoch": 1.8906942392909896,
      "grad_norm": 0.33159270882606506,
      "learning_rate": 7.39992609927331e-05,
      "loss": 0.2637,
      "step": 5120
    },
    {
      "epoch": 1.8910635155096012,
      "grad_norm": 0.24131393432617188,
      "learning_rate": 7.39746274171696e-05,
      "loss": 0.1861,
      "step": 5121
    },
    {
      "epoch": 1.8914327917282128,
      "grad_norm": 0.25703150033950806,
      "learning_rate": 7.394999384160612e-05,
      "loss": 0.2022,
      "step": 5122
    },
    {
      "epoch": 1.8918020679468244,
      "grad_norm": 0.29193922877311707,
      "learning_rate": 7.392536026604262e-05,
      "loss": 0.2539,
      "step": 5123
    },
    {
      "epoch": 1.8921713441654358,
      "grad_norm": 0.3627915382385254,
      "learning_rate": 7.390072669047913e-05,
      "loss": 0.2959,
      "step": 5124
    },
    {
      "epoch": 1.8925406203840471,
      "grad_norm": 0.27202221751213074,
      "learning_rate": 7.387609311491564e-05,
      "loss": 0.1999,
      "step": 5125
    },
    {
      "epoch": 1.8929098966026587,
      "grad_norm": 0.23262353241443634,
      "learning_rate": 7.385145953935214e-05,
      "loss": 0.1994,
      "step": 5126
    },
    {
      "epoch": 1.8932791728212703,
      "grad_norm": 0.27949628233909607,
      "learning_rate": 7.382682596378865e-05,
      "loss": 0.2639,
      "step": 5127
    },
    {
      "epoch": 1.893648449039882,
      "grad_norm": 0.27095603942871094,
      "learning_rate": 7.380219238822515e-05,
      "loss": 0.2037,
      "step": 5128
    },
    {
      "epoch": 1.8940177252584933,
      "grad_norm": 0.3029235601425171,
      "learning_rate": 7.377755881266167e-05,
      "loss": 0.2282,
      "step": 5129
    },
    {
      "epoch": 1.894387001477105,
      "grad_norm": 0.32053259015083313,
      "learning_rate": 7.375292523709817e-05,
      "loss": 0.1963,
      "step": 5130
    },
    {
      "epoch": 1.8947562776957163,
      "grad_norm": 0.25598132610321045,
      "learning_rate": 7.372829166153468e-05,
      "loss": 0.2195,
      "step": 5131
    },
    {
      "epoch": 1.895125553914328,
      "grad_norm": 0.27937015891075134,
      "learning_rate": 7.370365808597118e-05,
      "loss": 0.2338,
      "step": 5132
    },
    {
      "epoch": 1.8954948301329395,
      "grad_norm": 0.28756415843963623,
      "learning_rate": 7.367902451040768e-05,
      "loss": 0.2117,
      "step": 5133
    },
    {
      "epoch": 1.8958641063515511,
      "grad_norm": 0.2926604747772217,
      "learning_rate": 7.36543909348442e-05,
      "loss": 0.2589,
      "step": 5134
    },
    {
      "epoch": 1.8962333825701625,
      "grad_norm": 0.28033578395843506,
      "learning_rate": 7.36297573592807e-05,
      "loss": 0.2024,
      "step": 5135
    },
    {
      "epoch": 1.8966026587887739,
      "grad_norm": 0.2677004039287567,
      "learning_rate": 7.360512378371721e-05,
      "loss": 0.2347,
      "step": 5136
    },
    {
      "epoch": 1.8969719350073855,
      "grad_norm": 0.25729888677597046,
      "learning_rate": 7.358049020815372e-05,
      "loss": 0.2258,
      "step": 5137
    },
    {
      "epoch": 1.897341211225997,
      "grad_norm": 0.28130093216896057,
      "learning_rate": 7.355585663259023e-05,
      "loss": 0.2339,
      "step": 5138
    },
    {
      "epoch": 1.8977104874446087,
      "grad_norm": 0.31810981035232544,
      "learning_rate": 7.353122305702673e-05,
      "loss": 0.2431,
      "step": 5139
    },
    {
      "epoch": 1.89807976366322,
      "grad_norm": 0.23127999901771545,
      "learning_rate": 7.350658948146323e-05,
      "loss": 0.1878,
      "step": 5140
    },
    {
      "epoch": 1.8984490398818314,
      "grad_norm": 0.3018636405467987,
      "learning_rate": 7.348195590589975e-05,
      "loss": 0.2358,
      "step": 5141
    },
    {
      "epoch": 1.898818316100443,
      "grad_norm": 0.25260666012763977,
      "learning_rate": 7.345732233033625e-05,
      "loss": 0.2065,
      "step": 5142
    },
    {
      "epoch": 1.8991875923190547,
      "grad_norm": 0.2912799119949341,
      "learning_rate": 7.343268875477276e-05,
      "loss": 0.2322,
      "step": 5143
    },
    {
      "epoch": 1.8995568685376663,
      "grad_norm": 0.3367374539375305,
      "learning_rate": 7.340805517920926e-05,
      "loss": 0.2929,
      "step": 5144
    },
    {
      "epoch": 1.8999261447562779,
      "grad_norm": 0.3466246724128723,
      "learning_rate": 7.338342160364578e-05,
      "loss": 0.277,
      "step": 5145
    },
    {
      "epoch": 1.9002954209748892,
      "grad_norm": 0.2413496971130371,
      "learning_rate": 7.335878802808228e-05,
      "loss": 0.1826,
      "step": 5146
    },
    {
      "epoch": 1.9006646971935006,
      "grad_norm": 0.22739775478839874,
      "learning_rate": 7.33341544525188e-05,
      "loss": 0.207,
      "step": 5147
    },
    {
      "epoch": 1.9010339734121122,
      "grad_norm": 0.28335708379745483,
      "learning_rate": 7.33095208769553e-05,
      "loss": 0.2324,
      "step": 5148
    },
    {
      "epoch": 1.9014032496307238,
      "grad_norm": 0.33643093705177307,
      "learning_rate": 7.32848873013918e-05,
      "loss": 0.2339,
      "step": 5149
    },
    {
      "epoch": 1.9017725258493354,
      "grad_norm": 0.31164810061454773,
      "learning_rate": 7.326025372582831e-05,
      "loss": 0.2206,
      "step": 5150
    },
    {
      "epoch": 1.9017725258493354,
      "eval_loss": 0.25461217761039734,
      "eval_runtime": 5.854,
      "eval_samples_per_second": 8.541,
      "eval_steps_per_second": 1.196,
      "step": 5150
    },
    {
      "epoch": 1.9021418020679468,
      "grad_norm": 0.2678544521331787,
      "learning_rate": 7.323562015026481e-05,
      "loss": 0.2198,
      "step": 5151
    },
    {
      "epoch": 1.9025110782865582,
      "grad_norm": 0.35127219557762146,
      "learning_rate": 7.321098657470133e-05,
      "loss": 0.2759,
      "step": 5152
    },
    {
      "epoch": 1.9028803545051698,
      "grad_norm": 0.2919602394104004,
      "learning_rate": 7.318635299913783e-05,
      "loss": 0.2515,
      "step": 5153
    },
    {
      "epoch": 1.9032496307237814,
      "grad_norm": 0.2930256426334381,
      "learning_rate": 7.316171942357434e-05,
      "loss": 0.2082,
      "step": 5154
    },
    {
      "epoch": 1.903618906942393,
      "grad_norm": 0.3079281747341156,
      "learning_rate": 7.313708584801084e-05,
      "loss": 0.246,
      "step": 5155
    },
    {
      "epoch": 1.9039881831610044,
      "grad_norm": 0.2257537692785263,
      "learning_rate": 7.311245227244734e-05,
      "loss": 0.2006,
      "step": 5156
    },
    {
      "epoch": 1.904357459379616,
      "grad_norm": 0.2506227493286133,
      "learning_rate": 7.308781869688386e-05,
      "loss": 0.1847,
      "step": 5157
    },
    {
      "epoch": 1.9047267355982274,
      "grad_norm": 0.2114635407924652,
      "learning_rate": 7.306318512132036e-05,
      "loss": 0.1858,
      "step": 5158
    },
    {
      "epoch": 1.905096011816839,
      "grad_norm": 0.26355382800102234,
      "learning_rate": 7.303855154575688e-05,
      "loss": 0.2101,
      "step": 5159
    },
    {
      "epoch": 1.9054652880354506,
      "grad_norm": 0.293997585773468,
      "learning_rate": 7.301391797019338e-05,
      "loss": 0.2402,
      "step": 5160
    },
    {
      "epoch": 1.9058345642540622,
      "grad_norm": 0.26220446825027466,
      "learning_rate": 7.298928439462989e-05,
      "loss": 0.2145,
      "step": 5161
    },
    {
      "epoch": 1.9062038404726735,
      "grad_norm": 0.2998253405094147,
      "learning_rate": 7.296465081906639e-05,
      "loss": 0.2333,
      "step": 5162
    },
    {
      "epoch": 1.906573116691285,
      "grad_norm": 0.24114952981472015,
      "learning_rate": 7.29400172435029e-05,
      "loss": 0.2071,
      "step": 5163
    },
    {
      "epoch": 1.9069423929098965,
      "grad_norm": 0.326474130153656,
      "learning_rate": 7.291538366793941e-05,
      "loss": 0.2944,
      "step": 5164
    },
    {
      "epoch": 1.9073116691285081,
      "grad_norm": 0.2474275529384613,
      "learning_rate": 7.289075009237591e-05,
      "loss": 0.1793,
      "step": 5165
    },
    {
      "epoch": 1.9076809453471197,
      "grad_norm": 0.286654531955719,
      "learning_rate": 7.286611651681242e-05,
      "loss": 0.2206,
      "step": 5166
    },
    {
      "epoch": 1.9080502215657311,
      "grad_norm": 0.23051148653030396,
      "learning_rate": 7.284148294124892e-05,
      "loss": 0.1843,
      "step": 5167
    },
    {
      "epoch": 1.9084194977843427,
      "grad_norm": 0.31883504986763,
      "learning_rate": 7.281684936568544e-05,
      "loss": 0.2534,
      "step": 5168
    },
    {
      "epoch": 1.908788774002954,
      "grad_norm": 0.27582499384880066,
      "learning_rate": 7.279221579012194e-05,
      "loss": 0.2059,
      "step": 5169
    },
    {
      "epoch": 1.9091580502215657,
      "grad_norm": 0.2696746587753296,
      "learning_rate": 7.276758221455846e-05,
      "loss": 0.2242,
      "step": 5170
    },
    {
      "epoch": 1.9095273264401773,
      "grad_norm": 0.23835311830043793,
      "learning_rate": 7.274294863899496e-05,
      "loss": 0.1761,
      "step": 5171
    },
    {
      "epoch": 1.909896602658789,
      "grad_norm": 0.21435067057609558,
      "learning_rate": 7.271831506343146e-05,
      "loss": 0.1664,
      "step": 5172
    },
    {
      "epoch": 1.9102658788774003,
      "grad_norm": 0.3019494116306305,
      "learning_rate": 7.269368148786797e-05,
      "loss": 0.2086,
      "step": 5173
    },
    {
      "epoch": 1.9106351550960117,
      "grad_norm": 0.3723675012588501,
      "learning_rate": 7.266904791230447e-05,
      "loss": 0.2816,
      "step": 5174
    },
    {
      "epoch": 1.9110044313146233,
      "grad_norm": 0.2309865802526474,
      "learning_rate": 7.264441433674099e-05,
      "loss": 0.1917,
      "step": 5175
    },
    {
      "epoch": 1.9113737075332349,
      "grad_norm": 0.3170361816883087,
      "learning_rate": 7.261978076117749e-05,
      "loss": 0.2658,
      "step": 5176
    },
    {
      "epoch": 1.9117429837518465,
      "grad_norm": 0.29325804114341736,
      "learning_rate": 7.2595147185614e-05,
      "loss": 0.2351,
      "step": 5177
    },
    {
      "epoch": 1.9121122599704579,
      "grad_norm": 0.340961217880249,
      "learning_rate": 7.25705136100505e-05,
      "loss": 0.2204,
      "step": 5178
    },
    {
      "epoch": 1.9124815361890695,
      "grad_norm": 0.37032073736190796,
      "learning_rate": 7.254588003448702e-05,
      "loss": 0.2669,
      "step": 5179
    },
    {
      "epoch": 1.9128508124076808,
      "grad_norm": 0.27405688166618347,
      "learning_rate": 7.252124645892352e-05,
      "loss": 0.2193,
      "step": 5180
    },
    {
      "epoch": 1.9132200886262924,
      "grad_norm": 0.2763373553752899,
      "learning_rate": 7.249661288336002e-05,
      "loss": 0.1861,
      "step": 5181
    },
    {
      "epoch": 1.913589364844904,
      "grad_norm": 0.22096115350723267,
      "learning_rate": 7.247197930779654e-05,
      "loss": 0.1934,
      "step": 5182
    },
    {
      "epoch": 1.9139586410635157,
      "grad_norm": 0.2649868428707123,
      "learning_rate": 7.244734573223304e-05,
      "loss": 0.2002,
      "step": 5183
    },
    {
      "epoch": 1.914327917282127,
      "grad_norm": 0.24385647475719452,
      "learning_rate": 7.242271215666955e-05,
      "loss": 0.1991,
      "step": 5184
    },
    {
      "epoch": 1.9146971935007384,
      "grad_norm": 0.25321164727211,
      "learning_rate": 7.239807858110605e-05,
      "loss": 0.2015,
      "step": 5185
    },
    {
      "epoch": 1.91506646971935,
      "grad_norm": 0.2865865230560303,
      "learning_rate": 7.237344500554257e-05,
      "loss": 0.2198,
      "step": 5186
    },
    {
      "epoch": 1.9154357459379616,
      "grad_norm": 0.2982091009616852,
      "learning_rate": 7.234881142997907e-05,
      "loss": 0.2113,
      "step": 5187
    },
    {
      "epoch": 1.9158050221565732,
      "grad_norm": 0.37226611375808716,
      "learning_rate": 7.232417785441557e-05,
      "loss": 0.2114,
      "step": 5188
    },
    {
      "epoch": 1.9161742983751846,
      "grad_norm": 0.29692623019218445,
      "learning_rate": 7.229954427885208e-05,
      "loss": 0.2616,
      "step": 5189
    },
    {
      "epoch": 1.9165435745937962,
      "grad_norm": 0.28733372688293457,
      "learning_rate": 7.227491070328859e-05,
      "loss": 0.2408,
      "step": 5190
    },
    {
      "epoch": 1.9169128508124076,
      "grad_norm": 0.23799575865268707,
      "learning_rate": 7.22502771277251e-05,
      "loss": 0.1799,
      "step": 5191
    },
    {
      "epoch": 1.9172821270310192,
      "grad_norm": 0.20326031744480133,
      "learning_rate": 7.22256435521616e-05,
      "loss": 0.1758,
      "step": 5192
    },
    {
      "epoch": 1.9176514032496308,
      "grad_norm": 0.2550643980503082,
      "learning_rate": 7.220100997659812e-05,
      "loss": 0.2171,
      "step": 5193
    },
    {
      "epoch": 1.9180206794682424,
      "grad_norm": 0.253792941570282,
      "learning_rate": 7.217637640103462e-05,
      "loss": 0.202,
      "step": 5194
    },
    {
      "epoch": 1.9183899556868538,
      "grad_norm": 0.24402236938476562,
      "learning_rate": 7.215174282547113e-05,
      "loss": 0.1828,
      "step": 5195
    },
    {
      "epoch": 1.9187592319054652,
      "grad_norm": 0.35768184065818787,
      "learning_rate": 7.212710924990763e-05,
      "loss": 0.2377,
      "step": 5196
    },
    {
      "epoch": 1.9191285081240768,
      "grad_norm": 0.2649560868740082,
      "learning_rate": 7.210247567434413e-05,
      "loss": 0.1984,
      "step": 5197
    },
    {
      "epoch": 1.9194977843426884,
      "grad_norm": 0.25678685307502747,
      "learning_rate": 7.207784209878065e-05,
      "loss": 0.1793,
      "step": 5198
    },
    {
      "epoch": 1.9198670605613,
      "grad_norm": 0.25464650988578796,
      "learning_rate": 7.205320852321715e-05,
      "loss": 0.1932,
      "step": 5199
    },
    {
      "epoch": 1.9202363367799113,
      "grad_norm": 0.26528725028038025,
      "learning_rate": 7.202857494765366e-05,
      "loss": 0.1838,
      "step": 5200
    },
    {
      "epoch": 1.9202363367799113,
      "eval_loss": 0.25316575169563293,
      "eval_runtime": 5.8554,
      "eval_samples_per_second": 8.539,
      "eval_steps_per_second": 1.195,
      "step": 5200
    },
    {
      "epoch": 1.920605612998523,
      "grad_norm": 0.26819831132888794,
      "learning_rate": 7.200394137209016e-05,
      "loss": 0.1863,
      "step": 5201
    },
    {
      "epoch": 1.9209748892171343,
      "grad_norm": 0.3028496205806732,
      "learning_rate": 7.197930779652668e-05,
      "loss": 0.2565,
      "step": 5202
    },
    {
      "epoch": 1.921344165435746,
      "grad_norm": 0.4224024713039398,
      "learning_rate": 7.195467422096318e-05,
      "loss": 0.2971,
      "step": 5203
    },
    {
      "epoch": 1.9217134416543575,
      "grad_norm": 0.29464343190193176,
      "learning_rate": 7.193004064539968e-05,
      "loss": 0.2084,
      "step": 5204
    },
    {
      "epoch": 1.9220827178729691,
      "grad_norm": 0.2331015020608902,
      "learning_rate": 7.19054070698362e-05,
      "loss": 0.202,
      "step": 5205
    },
    {
      "epoch": 1.9224519940915805,
      "grad_norm": 0.34567561745643616,
      "learning_rate": 7.18807734942727e-05,
      "loss": 0.2981,
      "step": 5206
    },
    {
      "epoch": 1.922821270310192,
      "grad_norm": 0.26549074053764343,
      "learning_rate": 7.185613991870921e-05,
      "loss": 0.2161,
      "step": 5207
    },
    {
      "epoch": 1.9231905465288035,
      "grad_norm": 0.22987030446529388,
      "learning_rate": 7.183150634314571e-05,
      "loss": 0.1869,
      "step": 5208
    },
    {
      "epoch": 1.923559822747415,
      "grad_norm": 0.23912249505519867,
      "learning_rate": 7.180687276758223e-05,
      "loss": 0.1894,
      "step": 5209
    },
    {
      "epoch": 1.9239290989660267,
      "grad_norm": 0.2937624454498291,
      "learning_rate": 7.178223919201873e-05,
      "loss": 0.2183,
      "step": 5210
    },
    {
      "epoch": 1.924298375184638,
      "grad_norm": 0.27470824122428894,
      "learning_rate": 7.175760561645524e-05,
      "loss": 0.2037,
      "step": 5211
    },
    {
      "epoch": 1.9246676514032495,
      "grad_norm": 0.2689036428928375,
      "learning_rate": 7.173297204089174e-05,
      "loss": 0.1985,
      "step": 5212
    },
    {
      "epoch": 1.925036927621861,
      "grad_norm": 0.22076676785945892,
      "learning_rate": 7.170833846532825e-05,
      "loss": 0.1907,
      "step": 5213
    },
    {
      "epoch": 1.9254062038404727,
      "grad_norm": 0.28454598784446716,
      "learning_rate": 7.168370488976476e-05,
      "loss": 0.2264,
      "step": 5214
    },
    {
      "epoch": 1.9257754800590843,
      "grad_norm": 0.3232066035270691,
      "learning_rate": 7.165907131420126e-05,
      "loss": 0.2165,
      "step": 5215
    },
    {
      "epoch": 1.9261447562776959,
      "grad_norm": 0.2641231417655945,
      "learning_rate": 7.163443773863778e-05,
      "loss": 0.1965,
      "step": 5216
    },
    {
      "epoch": 1.9265140324963073,
      "grad_norm": 0.25479385256767273,
      "learning_rate": 7.160980416307428e-05,
      "loss": 0.2045,
      "step": 5217
    },
    {
      "epoch": 1.9268833087149186,
      "grad_norm": 0.25852954387664795,
      "learning_rate": 7.158517058751079e-05,
      "loss": 0.2051,
      "step": 5218
    },
    {
      "epoch": 1.9272525849335302,
      "grad_norm": 0.25149908661842346,
      "learning_rate": 7.156053701194729e-05,
      "loss": 0.1825,
      "step": 5219
    },
    {
      "epoch": 1.9276218611521418,
      "grad_norm": 0.2664920687675476,
      "learning_rate": 7.15359034363838e-05,
      "loss": 0.2118,
      "step": 5220
    },
    {
      "epoch": 1.9279911373707534,
      "grad_norm": 0.31141358613967896,
      "learning_rate": 7.151126986082031e-05,
      "loss": 0.2056,
      "step": 5221
    },
    {
      "epoch": 1.9283604135893648,
      "grad_norm": 0.28500014543533325,
      "learning_rate": 7.148663628525681e-05,
      "loss": 0.2746,
      "step": 5222
    },
    {
      "epoch": 1.9287296898079762,
      "grad_norm": 0.30131691694259644,
      "learning_rate": 7.146200270969332e-05,
      "loss": 0.2218,
      "step": 5223
    },
    {
      "epoch": 1.9290989660265878,
      "grad_norm": 0.2767343819141388,
      "learning_rate": 7.143736913412983e-05,
      "loss": 0.2258,
      "step": 5224
    },
    {
      "epoch": 1.9294682422451994,
      "grad_norm": 0.2807043790817261,
      "learning_rate": 7.141273555856633e-05,
      "loss": 0.2179,
      "step": 5225
    },
    {
      "epoch": 1.929837518463811,
      "grad_norm": 0.24697333574295044,
      "learning_rate": 7.138810198300283e-05,
      "loss": 0.196,
      "step": 5226
    },
    {
      "epoch": 1.9302067946824224,
      "grad_norm": 0.26040416955947876,
      "learning_rate": 7.136346840743934e-05,
      "loss": 0.195,
      "step": 5227
    },
    {
      "epoch": 1.930576070901034,
      "grad_norm": 0.2872878313064575,
      "learning_rate": 7.133883483187584e-05,
      "loss": 0.215,
      "step": 5228
    },
    {
      "epoch": 1.9309453471196454,
      "grad_norm": 0.3290000855922699,
      "learning_rate": 7.131420125631236e-05,
      "loss": 0.2409,
      "step": 5229
    },
    {
      "epoch": 1.931314623338257,
      "grad_norm": 0.2693164348602295,
      "learning_rate": 7.128956768074886e-05,
      "loss": 0.2028,
      "step": 5230
    },
    {
      "epoch": 1.9316838995568686,
      "grad_norm": 0.33382052183151245,
      "learning_rate": 7.126493410518536e-05,
      "loss": 0.2364,
      "step": 5231
    },
    {
      "epoch": 1.9320531757754802,
      "grad_norm": 0.2566787898540497,
      "learning_rate": 7.124030052962187e-05,
      "loss": 0.2047,
      "step": 5232
    },
    {
      "epoch": 1.9324224519940916,
      "grad_norm": 0.2897343635559082,
      "learning_rate": 7.121566695405838e-05,
      "loss": 0.2402,
      "step": 5233
    },
    {
      "epoch": 1.932791728212703,
      "grad_norm": 0.2652934491634369,
      "learning_rate": 7.119103337849489e-05,
      "loss": 0.2394,
      "step": 5234
    },
    {
      "epoch": 1.9331610044313146,
      "grad_norm": 0.3066202700138092,
      "learning_rate": 7.116639980293139e-05,
      "loss": 0.2273,
      "step": 5235
    },
    {
      "epoch": 1.9335302806499262,
      "grad_norm": 0.2847360074520111,
      "learning_rate": 7.11417662273679e-05,
      "loss": 0.2526,
      "step": 5236
    },
    {
      "epoch": 1.9338995568685378,
      "grad_norm": 0.2967818081378937,
      "learning_rate": 7.111713265180441e-05,
      "loss": 0.257,
      "step": 5237
    },
    {
      "epoch": 1.9342688330871491,
      "grad_norm": 0.4042901396751404,
      "learning_rate": 7.109249907624091e-05,
      "loss": 0.2662,
      "step": 5238
    },
    {
      "epoch": 1.9346381093057607,
      "grad_norm": 0.3567046821117401,
      "learning_rate": 7.106786550067742e-05,
      "loss": 0.268,
      "step": 5239
    },
    {
      "epoch": 1.9350073855243721,
      "grad_norm": 0.2794325649738312,
      "learning_rate": 7.104323192511392e-05,
      "loss": 0.2427,
      "step": 5240
    },
    {
      "epoch": 1.9353766617429837,
      "grad_norm": 0.28259146213531494,
      "learning_rate": 7.101859834955044e-05,
      "loss": 0.2398,
      "step": 5241
    },
    {
      "epoch": 1.9357459379615953,
      "grad_norm": 0.19708877801895142,
      "learning_rate": 7.099396477398694e-05,
      "loss": 0.1614,
      "step": 5242
    },
    {
      "epoch": 1.936115214180207,
      "grad_norm": 0.23787015676498413,
      "learning_rate": 7.096933119842345e-05,
      "loss": 0.1789,
      "step": 5243
    },
    {
      "epoch": 1.9364844903988183,
      "grad_norm": 0.30355992913246155,
      "learning_rate": 7.094469762285996e-05,
      "loss": 0.2219,
      "step": 5244
    },
    {
      "epoch": 1.9368537666174297,
      "grad_norm": 0.2670661509037018,
      "learning_rate": 7.092006404729647e-05,
      "loss": 0.2096,
      "step": 5245
    },
    {
      "epoch": 1.9372230428360413,
      "grad_norm": 0.29191645979881287,
      "learning_rate": 7.089543047173297e-05,
      "loss": 0.2071,
      "step": 5246
    },
    {
      "epoch": 1.937592319054653,
      "grad_norm": 0.2873486876487732,
      "learning_rate": 7.087079689616947e-05,
      "loss": 0.2219,
      "step": 5247
    },
    {
      "epoch": 1.9379615952732645,
      "grad_norm": 0.2358044534921646,
      "learning_rate": 7.084616332060599e-05,
      "loss": 0.1702,
      "step": 5248
    },
    {
      "epoch": 1.9383308714918759,
      "grad_norm": 0.2574106752872467,
      "learning_rate": 7.082152974504249e-05,
      "loss": 0.2199,
      "step": 5249
    },
    {
      "epoch": 1.9387001477104875,
      "grad_norm": 0.28576961159706116,
      "learning_rate": 7.0796896169479e-05,
      "loss": 0.2223,
      "step": 5250
    },
    {
      "epoch": 1.9387001477104875,
      "eval_loss": 0.2517310678958893,
      "eval_runtime": 5.8581,
      "eval_samples_per_second": 8.535,
      "eval_steps_per_second": 1.195,
      "step": 5250
    },
    {
      "epoch": 1.9390694239290989,
      "grad_norm": 0.23987966775894165,
      "learning_rate": 7.07722625939155e-05,
      "loss": 0.2067,
      "step": 5251
    },
    {
      "epoch": 1.9394387001477105,
      "grad_norm": 0.24387197196483612,
      "learning_rate": 7.074762901835202e-05,
      "loss": 0.2108,
      "step": 5252
    },
    {
      "epoch": 1.939807976366322,
      "grad_norm": 0.24827134609222412,
      "learning_rate": 7.072299544278852e-05,
      "loss": 0.1944,
      "step": 5253
    },
    {
      "epoch": 1.9401772525849337,
      "grad_norm": 0.2801428735256195,
      "learning_rate": 7.069836186722502e-05,
      "loss": 0.2428,
      "step": 5254
    },
    {
      "epoch": 1.940546528803545,
      "grad_norm": 0.24826756119728088,
      "learning_rate": 7.067372829166154e-05,
      "loss": 0.2212,
      "step": 5255
    },
    {
      "epoch": 1.9409158050221564,
      "grad_norm": 0.4102340340614319,
      "learning_rate": 7.064909471609804e-05,
      "loss": 0.2307,
      "step": 5256
    },
    {
      "epoch": 1.941285081240768,
      "grad_norm": 0.3000921308994293,
      "learning_rate": 7.062446114053455e-05,
      "loss": 0.2384,
      "step": 5257
    },
    {
      "epoch": 1.9416543574593796,
      "grad_norm": 0.2311098873615265,
      "learning_rate": 7.059982756497105e-05,
      "loss": 0.192,
      "step": 5258
    },
    {
      "epoch": 1.9420236336779912,
      "grad_norm": 0.28601640462875366,
      "learning_rate": 7.057519398940757e-05,
      "loss": 0.2215,
      "step": 5259
    },
    {
      "epoch": 1.9423929098966026,
      "grad_norm": 0.3139057159423828,
      "learning_rate": 7.055056041384407e-05,
      "loss": 0.2216,
      "step": 5260
    },
    {
      "epoch": 1.9427621861152142,
      "grad_norm": 0.24312689900398254,
      "learning_rate": 7.052592683828058e-05,
      "loss": 0.1908,
      "step": 5261
    },
    {
      "epoch": 1.9431314623338256,
      "grad_norm": 0.27276453375816345,
      "learning_rate": 7.050129326271708e-05,
      "loss": 0.2196,
      "step": 5262
    },
    {
      "epoch": 1.9435007385524372,
      "grad_norm": 0.3077089786529541,
      "learning_rate": 7.047665968715358e-05,
      "loss": 0.2178,
      "step": 5263
    },
    {
      "epoch": 1.9438700147710488,
      "grad_norm": 0.27808618545532227,
      "learning_rate": 7.04520261115901e-05,
      "loss": 0.2101,
      "step": 5264
    },
    {
      "epoch": 1.9442392909896604,
      "grad_norm": 0.2939068078994751,
      "learning_rate": 7.04273925360266e-05,
      "loss": 0.2634,
      "step": 5265
    },
    {
      "epoch": 1.9446085672082718,
      "grad_norm": 0.3380891978740692,
      "learning_rate": 7.040275896046312e-05,
      "loss": 0.2587,
      "step": 5266
    },
    {
      "epoch": 1.9449778434268832,
      "grad_norm": 0.25648233294487,
      "learning_rate": 7.037812538489962e-05,
      "loss": 0.1687,
      "step": 5267
    },
    {
      "epoch": 1.9453471196454948,
      "grad_norm": 0.4467228353023529,
      "learning_rate": 7.035349180933613e-05,
      "loss": 0.3074,
      "step": 5268
    },
    {
      "epoch": 1.9457163958641064,
      "grad_norm": 0.2929859161376953,
      "learning_rate": 7.032885823377263e-05,
      "loss": 0.2413,
      "step": 5269
    },
    {
      "epoch": 1.946085672082718,
      "grad_norm": 0.27283674478530884,
      "learning_rate": 7.030422465820913e-05,
      "loss": 0.232,
      "step": 5270
    },
    {
      "epoch": 1.9464549483013294,
      "grad_norm": 0.7194961905479431,
      "learning_rate": 7.027959108264565e-05,
      "loss": 0.2689,
      "step": 5271
    },
    {
      "epoch": 1.946824224519941,
      "grad_norm": 0.29518184065818787,
      "learning_rate": 7.025495750708215e-05,
      "loss": 0.2365,
      "step": 5272
    },
    {
      "epoch": 1.9471935007385524,
      "grad_norm": 0.24883228540420532,
      "learning_rate": 7.023032393151866e-05,
      "loss": 0.2224,
      "step": 5273
    },
    {
      "epoch": 1.947562776957164,
      "grad_norm": 0.29781991243362427,
      "learning_rate": 7.020569035595516e-05,
      "loss": 0.2041,
      "step": 5274
    },
    {
      "epoch": 1.9479320531757756,
      "grad_norm": 0.30253690481185913,
      "learning_rate": 7.018105678039168e-05,
      "loss": 0.2135,
      "step": 5275
    },
    {
      "epoch": 1.9483013293943872,
      "grad_norm": 0.25308331847190857,
      "learning_rate": 7.015642320482818e-05,
      "loss": 0.2281,
      "step": 5276
    },
    {
      "epoch": 1.9486706056129985,
      "grad_norm": 0.25189143419265747,
      "learning_rate": 7.01317896292647e-05,
      "loss": 0.2486,
      "step": 5277
    },
    {
      "epoch": 1.94903988183161,
      "grad_norm": 0.33361706137657166,
      "learning_rate": 7.01071560537012e-05,
      "loss": 0.2502,
      "step": 5278
    },
    {
      "epoch": 1.9494091580502215,
      "grad_norm": 0.2573055326938629,
      "learning_rate": 7.00825224781377e-05,
      "loss": 0.2221,
      "step": 5279
    },
    {
      "epoch": 1.9497784342688331,
      "grad_norm": 0.2805079221725464,
      "learning_rate": 7.005788890257421e-05,
      "loss": 0.2358,
      "step": 5280
    },
    {
      "epoch": 1.9501477104874447,
      "grad_norm": 0.2887052297592163,
      "learning_rate": 7.003325532701071e-05,
      "loss": 0.2126,
      "step": 5281
    },
    {
      "epoch": 1.950516986706056,
      "grad_norm": 0.27408942580223083,
      "learning_rate": 7.000862175144723e-05,
      "loss": 0.1986,
      "step": 5282
    },
    {
      "epoch": 1.9508862629246675,
      "grad_norm": 0.29488441348075867,
      "learning_rate": 6.998398817588373e-05,
      "loss": 0.2137,
      "step": 5283
    },
    {
      "epoch": 1.951255539143279,
      "grad_norm": 0.2893330752849579,
      "learning_rate": 6.995935460032024e-05,
      "loss": 0.2705,
      "step": 5284
    },
    {
      "epoch": 1.9516248153618907,
      "grad_norm": 0.2756158411502838,
      "learning_rate": 6.993472102475674e-05,
      "loss": 0.2014,
      "step": 5285
    },
    {
      "epoch": 1.9519940915805023,
      "grad_norm": 0.30714696645736694,
      "learning_rate": 6.991008744919325e-05,
      "loss": 0.2513,
      "step": 5286
    },
    {
      "epoch": 1.952363367799114,
      "grad_norm": 0.28137123584747314,
      "learning_rate": 6.988545387362976e-05,
      "loss": 0.1936,
      "step": 5287
    },
    {
      "epoch": 1.9527326440177253,
      "grad_norm": 0.26411962509155273,
      "learning_rate": 6.986082029806626e-05,
      "loss": 0.2098,
      "step": 5288
    },
    {
      "epoch": 1.9531019202363367,
      "grad_norm": 0.3234144449234009,
      "learning_rate": 6.983618672250278e-05,
      "loss": 0.2091,
      "step": 5289
    },
    {
      "epoch": 1.9534711964549483,
      "grad_norm": 0.2658655345439911,
      "learning_rate": 6.981155314693928e-05,
      "loss": 0.229,
      "step": 5290
    },
    {
      "epoch": 1.9538404726735599,
      "grad_norm": 0.2824770212173462,
      "learning_rate": 6.978691957137579e-05,
      "loss": 0.2324,
      "step": 5291
    },
    {
      "epoch": 1.9542097488921715,
      "grad_norm": 0.34331393241882324,
      "learning_rate": 6.976228599581229e-05,
      "loss": 0.2794,
      "step": 5292
    },
    {
      "epoch": 1.9545790251107829,
      "grad_norm": 0.38514310121536255,
      "learning_rate": 6.97376524202488e-05,
      "loss": 0.2777,
      "step": 5293
    },
    {
      "epoch": 1.9549483013293942,
      "grad_norm": 0.3110654354095459,
      "learning_rate": 6.971301884468531e-05,
      "loss": 0.231,
      "step": 5294
    },
    {
      "epoch": 1.9553175775480058,
      "grad_norm": 0.29559555649757385,
      "learning_rate": 6.968838526912181e-05,
      "loss": 0.1947,
      "step": 5295
    },
    {
      "epoch": 1.9556868537666174,
      "grad_norm": 0.20234008133411407,
      "learning_rate": 6.966375169355832e-05,
      "loss": 0.1782,
      "step": 5296
    },
    {
      "epoch": 1.956056129985229,
      "grad_norm": 0.2966025471687317,
      "learning_rate": 6.963911811799482e-05,
      "loss": 0.1906,
      "step": 5297
    },
    {
      "epoch": 1.9564254062038404,
      "grad_norm": 0.26159605383872986,
      "learning_rate": 6.961448454243134e-05,
      "loss": 0.2147,
      "step": 5298
    },
    {
      "epoch": 1.956794682422452,
      "grad_norm": 0.23278647661209106,
      "learning_rate": 6.958985096686784e-05,
      "loss": 0.1919,
      "step": 5299
    },
    {
      "epoch": 1.9571639586410634,
      "grad_norm": 0.25554853677749634,
      "learning_rate": 6.956521739130436e-05,
      "loss": 0.1835,
      "step": 5300
    },
    {
      "epoch": 1.9571639586410634,
      "eval_loss": 0.25134599208831787,
      "eval_runtime": 5.8596,
      "eval_samples_per_second": 8.533,
      "eval_steps_per_second": 1.195,
      "step": 5300
    },
    {
      "epoch": 1.957533234859675,
      "grad_norm": 0.2563522756099701,
      "learning_rate": 6.954058381574086e-05,
      "loss": 0.2164,
      "step": 5301
    },
    {
      "epoch": 1.9579025110782866,
      "grad_norm": 0.24337248504161835,
      "learning_rate": 6.951595024017736e-05,
      "loss": 0.194,
      "step": 5302
    },
    {
      "epoch": 1.9582717872968982,
      "grad_norm": 0.26176807284355164,
      "learning_rate": 6.949131666461387e-05,
      "loss": 0.1842,
      "step": 5303
    },
    {
      "epoch": 1.9586410635155096,
      "grad_norm": 0.31088557839393616,
      "learning_rate": 6.946668308905037e-05,
      "loss": 0.2423,
      "step": 5304
    },
    {
      "epoch": 1.959010339734121,
      "grad_norm": 0.3026870787143707,
      "learning_rate": 6.944204951348689e-05,
      "loss": 0.2301,
      "step": 5305
    },
    {
      "epoch": 1.9593796159527326,
      "grad_norm": 0.24130895733833313,
      "learning_rate": 6.941741593792339e-05,
      "loss": 0.1882,
      "step": 5306
    },
    {
      "epoch": 1.9597488921713442,
      "grad_norm": 0.25114375352859497,
      "learning_rate": 6.93927823623599e-05,
      "loss": 0.1871,
      "step": 5307
    },
    {
      "epoch": 1.9601181683899558,
      "grad_norm": 0.29826170206069946,
      "learning_rate": 6.93681487867964e-05,
      "loss": 0.2672,
      "step": 5308
    },
    {
      "epoch": 1.9604874446085672,
      "grad_norm": 0.2573586702346802,
      "learning_rate": 6.93435152112329e-05,
      "loss": 0.2161,
      "step": 5309
    },
    {
      "epoch": 1.9608567208271788,
      "grad_norm": 0.2709135413169861,
      "learning_rate": 6.931888163566942e-05,
      "loss": 0.2192,
      "step": 5310
    },
    {
      "epoch": 1.9612259970457901,
      "grad_norm": 0.29191112518310547,
      "learning_rate": 6.929424806010592e-05,
      "loss": 0.1997,
      "step": 5311
    },
    {
      "epoch": 1.9615952732644018,
      "grad_norm": 0.2645627558231354,
      "learning_rate": 6.926961448454244e-05,
      "loss": 0.2374,
      "step": 5312
    },
    {
      "epoch": 1.9619645494830134,
      "grad_norm": 0.29544684290885925,
      "learning_rate": 6.924498090897894e-05,
      "loss": 0.2678,
      "step": 5313
    },
    {
      "epoch": 1.962333825701625,
      "grad_norm": 0.2862391471862793,
      "learning_rate": 6.922034733341545e-05,
      "loss": 0.2529,
      "step": 5314
    },
    {
      "epoch": 1.9627031019202363,
      "grad_norm": 0.29966267943382263,
      "learning_rate": 6.919571375785195e-05,
      "loss": 0.2454,
      "step": 5315
    },
    {
      "epoch": 1.9630723781388477,
      "grad_norm": 0.27755698561668396,
      "learning_rate": 6.917108018228847e-05,
      "loss": 0.2241,
      "step": 5316
    },
    {
      "epoch": 1.9634416543574593,
      "grad_norm": 0.2564755082130432,
      "learning_rate": 6.914644660672497e-05,
      "loss": 0.2023,
      "step": 5317
    },
    {
      "epoch": 1.963810930576071,
      "grad_norm": 0.37182116508483887,
      "learning_rate": 6.912181303116147e-05,
      "loss": 0.2837,
      "step": 5318
    },
    {
      "epoch": 1.9641802067946825,
      "grad_norm": 0.2805497348308563,
      "learning_rate": 6.909717945559798e-05,
      "loss": 0.2356,
      "step": 5319
    },
    {
      "epoch": 1.964549483013294,
      "grad_norm": 0.2930833101272583,
      "learning_rate": 6.907254588003449e-05,
      "loss": 0.2019,
      "step": 5320
    },
    {
      "epoch": 1.9649187592319055,
      "grad_norm": 0.24924765527248383,
      "learning_rate": 6.9047912304471e-05,
      "loss": 0.2044,
      "step": 5321
    },
    {
      "epoch": 1.965288035450517,
      "grad_norm": 0.26734644174575806,
      "learning_rate": 6.90232787289075e-05,
      "loss": 0.2452,
      "step": 5322
    },
    {
      "epoch": 1.9656573116691285,
      "grad_norm": 0.2351471185684204,
      "learning_rate": 6.899864515334402e-05,
      "loss": 0.1839,
      "step": 5323
    },
    {
      "epoch": 1.96602658788774,
      "grad_norm": 0.24871516227722168,
      "learning_rate": 6.897401157778052e-05,
      "loss": 0.1809,
      "step": 5324
    },
    {
      "epoch": 1.9663958641063517,
      "grad_norm": 0.24512727558612823,
      "learning_rate": 6.894937800221702e-05,
      "loss": 0.1941,
      "step": 5325
    },
    {
      "epoch": 1.966765140324963,
      "grad_norm": 0.2477511316537857,
      "learning_rate": 6.892474442665353e-05,
      "loss": 0.186,
      "step": 5326
    },
    {
      "epoch": 1.9671344165435745,
      "grad_norm": 0.2817474901676178,
      "learning_rate": 6.890011085109003e-05,
      "loss": 0.2411,
      "step": 5327
    },
    {
      "epoch": 1.967503692762186,
      "grad_norm": 0.26521196961402893,
      "learning_rate": 6.887547727552655e-05,
      "loss": 0.2075,
      "step": 5328
    },
    {
      "epoch": 1.9678729689807977,
      "grad_norm": 0.2950814366340637,
      "learning_rate": 6.885084369996305e-05,
      "loss": 0.2158,
      "step": 5329
    },
    {
      "epoch": 1.9682422451994093,
      "grad_norm": 0.25968989729881287,
      "learning_rate": 6.882621012439956e-05,
      "loss": 0.2009,
      "step": 5330
    },
    {
      "epoch": 1.9686115214180206,
      "grad_norm": 0.24506819248199463,
      "learning_rate": 6.880157654883607e-05,
      "loss": 0.2173,
      "step": 5331
    },
    {
      "epoch": 1.9689807976366323,
      "grad_norm": 0.26394718885421753,
      "learning_rate": 6.877694297327258e-05,
      "loss": 0.2073,
      "step": 5332
    },
    {
      "epoch": 1.9693500738552436,
      "grad_norm": 0.3314740061759949,
      "learning_rate": 6.875230939770908e-05,
      "loss": 0.2251,
      "step": 5333
    },
    {
      "epoch": 1.9697193500738552,
      "grad_norm": 0.23417414724826813,
      "learning_rate": 6.872767582214558e-05,
      "loss": 0.187,
      "step": 5334
    },
    {
      "epoch": 1.9700886262924668,
      "grad_norm": 0.2197985053062439,
      "learning_rate": 6.87030422465821e-05,
      "loss": 0.18,
      "step": 5335
    },
    {
      "epoch": 1.9704579025110784,
      "grad_norm": 0.3090750277042389,
      "learning_rate": 6.86784086710186e-05,
      "loss": 0.2503,
      "step": 5336
    },
    {
      "epoch": 1.9708271787296898,
      "grad_norm": 0.2460378110408783,
      "learning_rate": 6.865377509545511e-05,
      "loss": 0.1782,
      "step": 5337
    },
    {
      "epoch": 1.9711964549483012,
      "grad_norm": 0.2581346929073334,
      "learning_rate": 6.862914151989161e-05,
      "loss": 0.2076,
      "step": 5338
    },
    {
      "epoch": 1.9715657311669128,
      "grad_norm": 0.31432342529296875,
      "learning_rate": 6.860450794432813e-05,
      "loss": 0.2526,
      "step": 5339
    },
    {
      "epoch": 1.9719350073855244,
      "grad_norm": 0.24623006582260132,
      "learning_rate": 6.857987436876463e-05,
      "loss": 0.2085,
      "step": 5340
    },
    {
      "epoch": 1.972304283604136,
      "grad_norm": 0.2564246356487274,
      "learning_rate": 6.855524079320113e-05,
      "loss": 0.2047,
      "step": 5341
    },
    {
      "epoch": 1.9726735598227474,
      "grad_norm": 0.2790607511997223,
      "learning_rate": 6.853060721763764e-05,
      "loss": 0.1971,
      "step": 5342
    },
    {
      "epoch": 1.9730428360413588,
      "grad_norm": 0.3743658661842346,
      "learning_rate": 6.850597364207415e-05,
      "loss": 0.2582,
      "step": 5343
    },
    {
      "epoch": 1.9734121122599704,
      "grad_norm": 0.27603679895401,
      "learning_rate": 6.848134006651066e-05,
      "loss": 0.2305,
      "step": 5344
    },
    {
      "epoch": 1.973781388478582,
      "grad_norm": 0.23608393967151642,
      "learning_rate": 6.845670649094716e-05,
      "loss": 0.1668,
      "step": 5345
    },
    {
      "epoch": 1.9741506646971936,
      "grad_norm": 0.2994793653488159,
      "learning_rate": 6.843207291538368e-05,
      "loss": 0.2404,
      "step": 5346
    },
    {
      "epoch": 1.9745199409158052,
      "grad_norm": 0.25873616337776184,
      "learning_rate": 6.840743933982018e-05,
      "loss": 0.2167,
      "step": 5347
    },
    {
      "epoch": 1.9748892171344166,
      "grad_norm": 0.24730846285820007,
      "learning_rate": 6.838280576425669e-05,
      "loss": 0.2044,
      "step": 5348
    },
    {
      "epoch": 1.975258493353028,
      "grad_norm": 0.28112494945526123,
      "learning_rate": 6.835817218869319e-05,
      "loss": 0.1811,
      "step": 5349
    },
    {
      "epoch": 1.9756277695716395,
      "grad_norm": 0.2771839499473572,
      "learning_rate": 6.83335386131297e-05,
      "loss": 0.2312,
      "step": 5350
    },
    {
      "epoch": 1.9756277695716395,
      "eval_loss": 0.2520390450954437,
      "eval_runtime": 5.8584,
      "eval_samples_per_second": 8.535,
      "eval_steps_per_second": 1.195,
      "step": 5350
    },
    {
      "epoch": 1.9759970457902511,
      "grad_norm": 0.30305880308151245,
      "learning_rate": 6.830890503756621e-05,
      "loss": 0.2449,
      "step": 5351
    },
    {
      "epoch": 1.9763663220088628,
      "grad_norm": 0.5072112083435059,
      "learning_rate": 6.828427146200271e-05,
      "loss": 0.2536,
      "step": 5352
    },
    {
      "epoch": 1.9767355982274741,
      "grad_norm": 0.3998947739601135,
      "learning_rate": 6.825963788643922e-05,
      "loss": 0.2253,
      "step": 5353
    },
    {
      "epoch": 1.9771048744460855,
      "grad_norm": 0.2706662118434906,
      "learning_rate": 6.823500431087573e-05,
      "loss": 0.1938,
      "step": 5354
    },
    {
      "epoch": 1.9774741506646971,
      "grad_norm": 0.3075130581855774,
      "learning_rate": 6.821037073531224e-05,
      "loss": 0.236,
      "step": 5355
    },
    {
      "epoch": 1.9778434268833087,
      "grad_norm": 0.24399112164974213,
      "learning_rate": 6.818573715974874e-05,
      "loss": 0.225,
      "step": 5356
    },
    {
      "epoch": 1.9782127031019203,
      "grad_norm": 0.28036928176879883,
      "learning_rate": 6.816110358418524e-05,
      "loss": 0.2248,
      "step": 5357
    },
    {
      "epoch": 1.9785819793205317,
      "grad_norm": 0.2429688572883606,
      "learning_rate": 6.813647000862176e-05,
      "loss": 0.2194,
      "step": 5358
    },
    {
      "epoch": 1.9789512555391433,
      "grad_norm": 0.2006453573703766,
      "learning_rate": 6.811183643305826e-05,
      "loss": 0.1701,
      "step": 5359
    },
    {
      "epoch": 1.9793205317577547,
      "grad_norm": 0.2574852406978607,
      "learning_rate": 6.808720285749477e-05,
      "loss": 0.1866,
      "step": 5360
    },
    {
      "epoch": 1.9796898079763663,
      "grad_norm": 0.31463634967803955,
      "learning_rate": 6.806256928193127e-05,
      "loss": 0.2167,
      "step": 5361
    },
    {
      "epoch": 1.980059084194978,
      "grad_norm": 0.26930132508277893,
      "learning_rate": 6.803793570636779e-05,
      "loss": 0.2061,
      "step": 5362
    },
    {
      "epoch": 1.9804283604135895,
      "grad_norm": 0.30395740270614624,
      "learning_rate": 6.801330213080429e-05,
      "loss": 0.2296,
      "step": 5363
    },
    {
      "epoch": 1.9807976366322009,
      "grad_norm": 0.27030232548713684,
      "learning_rate": 6.79886685552408e-05,
      "loss": 0.2207,
      "step": 5364
    },
    {
      "epoch": 1.9811669128508123,
      "grad_norm": 0.2603955566883087,
      "learning_rate": 6.79640349796773e-05,
      "loss": 0.2224,
      "step": 5365
    },
    {
      "epoch": 1.9815361890694239,
      "grad_norm": 0.2569535970687866,
      "learning_rate": 6.79394014041138e-05,
      "loss": 0.2044,
      "step": 5366
    },
    {
      "epoch": 1.9819054652880355,
      "grad_norm": 0.28096988797187805,
      "learning_rate": 6.791476782855032e-05,
      "loss": 0.2207,
      "step": 5367
    },
    {
      "epoch": 1.982274741506647,
      "grad_norm": 0.2559007704257965,
      "learning_rate": 6.789013425298682e-05,
      "loss": 0.1836,
      "step": 5368
    },
    {
      "epoch": 1.9826440177252584,
      "grad_norm": 0.2963027060031891,
      "learning_rate": 6.786550067742334e-05,
      "loss": 0.2192,
      "step": 5369
    },
    {
      "epoch": 1.98301329394387,
      "grad_norm": 0.34823083877563477,
      "learning_rate": 6.784086710185984e-05,
      "loss": 0.2689,
      "step": 5370
    },
    {
      "epoch": 1.9833825701624814,
      "grad_norm": 0.30800661444664,
      "learning_rate": 6.781623352629635e-05,
      "loss": 0.27,
      "step": 5371
    },
    {
      "epoch": 1.983751846381093,
      "grad_norm": 0.29237210750579834,
      "learning_rate": 6.779159995073285e-05,
      "loss": 0.2415,
      "step": 5372
    },
    {
      "epoch": 1.9841211225997046,
      "grad_norm": 0.25844356417655945,
      "learning_rate": 6.776696637516935e-05,
      "loss": 0.1993,
      "step": 5373
    },
    {
      "epoch": 1.9844903988183162,
      "grad_norm": 0.3420666456222534,
      "learning_rate": 6.774233279960587e-05,
      "loss": 0.2425,
      "step": 5374
    },
    {
      "epoch": 1.9848596750369276,
      "grad_norm": 0.2594550549983978,
      "learning_rate": 6.771769922404237e-05,
      "loss": 0.2281,
      "step": 5375
    },
    {
      "epoch": 1.985228951255539,
      "grad_norm": 0.2631921172142029,
      "learning_rate": 6.769306564847889e-05,
      "loss": 0.208,
      "step": 5376
    },
    {
      "epoch": 1.9855982274741506,
      "grad_norm": 0.26732879877090454,
      "learning_rate": 6.766843207291539e-05,
      "loss": 0.2159,
      "step": 5377
    },
    {
      "epoch": 1.9859675036927622,
      "grad_norm": 0.24175330996513367,
      "learning_rate": 6.76437984973519e-05,
      "loss": 0.2031,
      "step": 5378
    },
    {
      "epoch": 1.9863367799113738,
      "grad_norm": 0.2626853585243225,
      "learning_rate": 6.76191649217884e-05,
      "loss": 0.21,
      "step": 5379
    },
    {
      "epoch": 1.9867060561299852,
      "grad_norm": 0.28533270955085754,
      "learning_rate": 6.75945313462249e-05,
      "loss": 0.2124,
      "step": 5380
    },
    {
      "epoch": 1.9870753323485968,
      "grad_norm": 0.284945011138916,
      "learning_rate": 6.756989777066142e-05,
      "loss": 0.2281,
      "step": 5381
    },
    {
      "epoch": 1.9874446085672082,
      "grad_norm": 0.38396868109703064,
      "learning_rate": 6.754526419509792e-05,
      "loss": 0.2384,
      "step": 5382
    },
    {
      "epoch": 1.9878138847858198,
      "grad_norm": 0.3203868567943573,
      "learning_rate": 6.752063061953443e-05,
      "loss": 0.241,
      "step": 5383
    },
    {
      "epoch": 1.9881831610044314,
      "grad_norm": 0.32469770312309265,
      "learning_rate": 6.749599704397093e-05,
      "loss": 0.2473,
      "step": 5384
    },
    {
      "epoch": 1.988552437223043,
      "grad_norm": 0.2803540527820587,
      "learning_rate": 6.747136346840745e-05,
      "loss": 0.2066,
      "step": 5385
    },
    {
      "epoch": 1.9889217134416544,
      "grad_norm": 0.3043845593929291,
      "learning_rate": 6.744672989284395e-05,
      "loss": 0.242,
      "step": 5386
    },
    {
      "epoch": 1.9892909896602657,
      "grad_norm": 0.2872565686702728,
      "learning_rate": 6.742209631728046e-05,
      "loss": 0.2266,
      "step": 5387
    },
    {
      "epoch": 1.9896602658788773,
      "grad_norm": 0.284017413854599,
      "learning_rate": 6.739746274171697e-05,
      "loss": 0.2524,
      "step": 5388
    },
    {
      "epoch": 1.990029542097489,
      "grad_norm": 0.2788662910461426,
      "learning_rate": 6.737282916615347e-05,
      "loss": 0.1927,
      "step": 5389
    },
    {
      "epoch": 1.9903988183161005,
      "grad_norm": 0.2847667634487152,
      "learning_rate": 6.734819559058998e-05,
      "loss": 0.2155,
      "step": 5390
    },
    {
      "epoch": 1.990768094534712,
      "grad_norm": 0.3192155361175537,
      "learning_rate": 6.732356201502648e-05,
      "loss": 0.2498,
      "step": 5391
    },
    {
      "epoch": 1.9911373707533235,
      "grad_norm": 0.28015974164009094,
      "learning_rate": 6.7298928439463e-05,
      "loss": 0.2315,
      "step": 5392
    },
    {
      "epoch": 1.991506646971935,
      "grad_norm": 0.23439064621925354,
      "learning_rate": 6.72742948638995e-05,
      "loss": 0.2264,
      "step": 5393
    },
    {
      "epoch": 1.9918759231905465,
      "grad_norm": 0.2239377647638321,
      "learning_rate": 6.724966128833601e-05,
      "loss": 0.1971,
      "step": 5394
    },
    {
      "epoch": 1.9922451994091581,
      "grad_norm": 0.28067389130592346,
      "learning_rate": 6.722502771277251e-05,
      "loss": 0.2239,
      "step": 5395
    },
    {
      "epoch": 1.9926144756277697,
      "grad_norm": 0.28197333216667175,
      "learning_rate": 6.720039413720902e-05,
      "loss": 0.2223,
      "step": 5396
    },
    {
      "epoch": 1.992983751846381,
      "grad_norm": 0.25778698921203613,
      "learning_rate": 6.717576056164553e-05,
      "loss": 0.2065,
      "step": 5397
    },
    {
      "epoch": 1.9933530280649925,
      "grad_norm": 0.2842516303062439,
      "learning_rate": 6.715112698608203e-05,
      "loss": 0.2293,
      "step": 5398
    },
    {
      "epoch": 1.993722304283604,
      "grad_norm": 0.23083434998989105,
      "learning_rate": 6.712649341051855e-05,
      "loss": 0.2006,
      "step": 5399
    },
    {
      "epoch": 1.9940915805022157,
      "grad_norm": 0.2679019272327423,
      "learning_rate": 6.710185983495505e-05,
      "loss": 0.2441,
      "step": 5400
    },
    {
      "epoch": 1.9940915805022157,
      "eval_loss": 0.24856449663639069,
      "eval_runtime": 5.8555,
      "eval_samples_per_second": 8.539,
      "eval_steps_per_second": 1.195,
      "step": 5400
    },
    {
      "epoch": 1.9944608567208273,
      "grad_norm": 0.2704894542694092,
      "learning_rate": 6.707722625939156e-05,
      "loss": 0.26,
      "step": 5401
    },
    {
      "epoch": 1.9948301329394387,
      "grad_norm": 0.28078630566596985,
      "learning_rate": 6.705259268382806e-05,
      "loss": 0.2296,
      "step": 5402
    },
    {
      "epoch": 1.9951994091580503,
      "grad_norm": 0.30929094552993774,
      "learning_rate": 6.702795910826458e-05,
      "loss": 0.2217,
      "step": 5403
    },
    {
      "epoch": 1.9955686853766617,
      "grad_norm": 0.2846240699291229,
      "learning_rate": 6.700332553270108e-05,
      "loss": 0.207,
      "step": 5404
    },
    {
      "epoch": 1.9959379615952733,
      "grad_norm": 0.25282275676727295,
      "learning_rate": 6.697869195713758e-05,
      "loss": 0.1919,
      "step": 5405
    },
    {
      "epoch": 1.9963072378138849,
      "grad_norm": 0.2732691764831543,
      "learning_rate": 6.69540583815741e-05,
      "loss": 0.2137,
      "step": 5406
    },
    {
      "epoch": 1.9966765140324965,
      "grad_norm": 0.316196084022522,
      "learning_rate": 6.69294248060106e-05,
      "loss": 0.239,
      "step": 5407
    },
    {
      "epoch": 1.9970457902511078,
      "grad_norm": 0.2614864110946655,
      "learning_rate": 6.690479123044711e-05,
      "loss": 0.2092,
      "step": 5408
    },
    {
      "epoch": 1.9974150664697192,
      "grad_norm": 0.30646562576293945,
      "learning_rate": 6.688015765488361e-05,
      "loss": 0.218,
      "step": 5409
    },
    {
      "epoch": 1.9977843426883308,
      "grad_norm": 0.29279881715774536,
      "learning_rate": 6.685552407932013e-05,
      "loss": 0.2254,
      "step": 5410
    },
    {
      "epoch": 1.9981536189069424,
      "grad_norm": 0.22450609505176544,
      "learning_rate": 6.683089050375663e-05,
      "loss": 0.1983,
      "step": 5411
    },
    {
      "epoch": 1.998522895125554,
      "grad_norm": 0.2599349617958069,
      "learning_rate": 6.680625692819313e-05,
      "loss": 0.1904,
      "step": 5412
    },
    {
      "epoch": 1.9988921713441654,
      "grad_norm": 0.23740121722221375,
      "learning_rate": 6.678162335262964e-05,
      "loss": 0.2221,
      "step": 5413
    },
    {
      "epoch": 1.9992614475627768,
      "grad_norm": 0.24713720381259918,
      "learning_rate": 6.675698977706614e-05,
      "loss": 0.2003,
      "step": 5414
    },
    {
      "epoch": 1.9996307237813884,
      "grad_norm": 0.3049919903278351,
      "learning_rate": 6.673235620150266e-05,
      "loss": 0.2308,
      "step": 5415
    },
    {
      "epoch": 2.0,
      "grad_norm": 0.3003247082233429,
      "learning_rate": 6.670772262593916e-05,
      "loss": 0.2386,
      "step": 5416
    },
    {
      "epoch": 2.0003692762186116,
      "grad_norm": 0.23018822073936462,
      "learning_rate": 6.668308905037567e-05,
      "loss": 0.149,
      "step": 5417
    },
    {
      "epoch": 2.000738552437223,
      "grad_norm": 0.21735535562038422,
      "learning_rate": 6.665845547481217e-05,
      "loss": 0.1752,
      "step": 5418
    },
    {
      "epoch": 2.0011078286558344,
      "grad_norm": 0.21702949702739716,
      "learning_rate": 6.663382189924869e-05,
      "loss": 0.1806,
      "step": 5419
    },
    {
      "epoch": 2.001477104874446,
      "grad_norm": 0.22789764404296875,
      "learning_rate": 6.660918832368519e-05,
      "loss": 0.2246,
      "step": 5420
    },
    {
      "epoch": 2.0018463810930576,
      "grad_norm": 0.22062279284000397,
      "learning_rate": 6.658455474812169e-05,
      "loss": 0.1985,
      "step": 5421
    },
    {
      "epoch": 2.002215657311669,
      "grad_norm": 0.18831095099449158,
      "learning_rate": 6.65599211725582e-05,
      "loss": 0.15,
      "step": 5422
    },
    {
      "epoch": 2.0025849335302808,
      "grad_norm": 0.25476741790771484,
      "learning_rate": 6.653528759699471e-05,
      "loss": 0.1663,
      "step": 5423
    },
    {
      "epoch": 2.0029542097488924,
      "grad_norm": 0.2441297024488449,
      "learning_rate": 6.651065402143122e-05,
      "loss": 0.1603,
      "step": 5424
    },
    {
      "epoch": 2.0033234859675035,
      "grad_norm": 0.25700199604034424,
      "learning_rate": 6.648602044586772e-05,
      "loss": 0.2059,
      "step": 5425
    },
    {
      "epoch": 2.003692762186115,
      "grad_norm": 0.25105035305023193,
      "learning_rate": 6.646138687030424e-05,
      "loss": 0.1579,
      "step": 5426
    },
    {
      "epoch": 2.0040620384047267,
      "grad_norm": 0.24316802620887756,
      "learning_rate": 6.643675329474074e-05,
      "loss": 0.1654,
      "step": 5427
    },
    {
      "epoch": 2.0044313146233383,
      "grad_norm": 0.21353621780872345,
      "learning_rate": 6.641211971917724e-05,
      "loss": 0.1283,
      "step": 5428
    },
    {
      "epoch": 2.00480059084195,
      "grad_norm": 0.2258773148059845,
      "learning_rate": 6.638748614361375e-05,
      "loss": 0.158,
      "step": 5429
    },
    {
      "epoch": 2.005169867060561,
      "grad_norm": 0.22706244885921478,
      "learning_rate": 6.636285256805026e-05,
      "loss": 0.1829,
      "step": 5430
    },
    {
      "epoch": 2.0055391432791727,
      "grad_norm": 0.252373069524765,
      "learning_rate": 6.633821899248677e-05,
      "loss": 0.1573,
      "step": 5431
    },
    {
      "epoch": 2.0059084194977843,
      "grad_norm": 0.2511407136917114,
      "learning_rate": 6.631358541692327e-05,
      "loss": 0.1481,
      "step": 5432
    },
    {
      "epoch": 2.006277695716396,
      "grad_norm": 0.19744746387004852,
      "learning_rate": 6.628895184135979e-05,
      "loss": 0.1547,
      "step": 5433
    },
    {
      "epoch": 2.0066469719350075,
      "grad_norm": 0.2841181755065918,
      "learning_rate": 6.626431826579629e-05,
      "loss": 0.1925,
      "step": 5434
    },
    {
      "epoch": 2.007016248153619,
      "grad_norm": 0.260434627532959,
      "learning_rate": 6.62396846902328e-05,
      "loss": 0.1855,
      "step": 5435
    },
    {
      "epoch": 2.0073855243722303,
      "grad_norm": 0.21629783511161804,
      "learning_rate": 6.62150511146693e-05,
      "loss": 0.1538,
      "step": 5436
    },
    {
      "epoch": 2.007754800590842,
      "grad_norm": 0.25323525071144104,
      "learning_rate": 6.61904175391058e-05,
      "loss": 0.1468,
      "step": 5437
    },
    {
      "epoch": 2.0081240768094535,
      "grad_norm": 0.2551720440387726,
      "learning_rate": 6.616578396354232e-05,
      "loss": 0.18,
      "step": 5438
    },
    {
      "epoch": 2.008493353028065,
      "grad_norm": 0.30982598662376404,
      "learning_rate": 6.614115038797882e-05,
      "loss": 0.1966,
      "step": 5439
    },
    {
      "epoch": 2.0088626292466767,
      "grad_norm": 0.2552310526371002,
      "learning_rate": 6.611651681241533e-05,
      "loss": 0.1756,
      "step": 5440
    },
    {
      "epoch": 2.009231905465288,
      "grad_norm": 0.3050388991832733,
      "learning_rate": 6.609188323685184e-05,
      "loss": 0.1782,
      "step": 5441
    },
    {
      "epoch": 2.0096011816838995,
      "grad_norm": 0.28946226835250854,
      "learning_rate": 6.606724966128835e-05,
      "loss": 0.1919,
      "step": 5442
    },
    {
      "epoch": 2.009970457902511,
      "grad_norm": 0.36026933789253235,
      "learning_rate": 6.604261608572485e-05,
      "loss": 0.1804,
      "step": 5443
    },
    {
      "epoch": 2.0103397341211227,
      "grad_norm": 0.22443272173404694,
      "learning_rate": 6.601798251016135e-05,
      "loss": 0.1823,
      "step": 5444
    },
    {
      "epoch": 2.0107090103397343,
      "grad_norm": 0.24858684837818146,
      "learning_rate": 6.599334893459787e-05,
      "loss": 0.1645,
      "step": 5445
    },
    {
      "epoch": 2.011078286558346,
      "grad_norm": 0.2676042914390564,
      "learning_rate": 6.596871535903437e-05,
      "loss": 0.1724,
      "step": 5446
    },
    {
      "epoch": 2.011447562776957,
      "grad_norm": 0.28836727142333984,
      "learning_rate": 6.594408178347088e-05,
      "loss": 0.193,
      "step": 5447
    },
    {
      "epoch": 2.0118168389955686,
      "grad_norm": 0.2760215997695923,
      "learning_rate": 6.591944820790738e-05,
      "loss": 0.1856,
      "step": 5448
    },
    {
      "epoch": 2.0121861152141802,
      "grad_norm": 0.29110774397850037,
      "learning_rate": 6.58948146323439e-05,
      "loss": 0.1709,
      "step": 5449
    },
    {
      "epoch": 2.012555391432792,
      "grad_norm": 0.2904491722583771,
      "learning_rate": 6.58701810567804e-05,
      "loss": 0.1596,
      "step": 5450
    },
    {
      "epoch": 2.012555391432792,
      "eval_loss": 0.2550851106643677,
      "eval_runtime": 5.8496,
      "eval_samples_per_second": 8.548,
      "eval_steps_per_second": 1.197,
      "step": 5450
    },
    {
      "epoch": 2.0129246676514034,
      "grad_norm": 0.3089062571525574,
      "learning_rate": 6.58455474812169e-05,
      "loss": 0.167,
      "step": 5451
    },
    {
      "epoch": 2.0132939438700146,
      "grad_norm": 0.2824161648750305,
      "learning_rate": 6.582091390565341e-05,
      "loss": 0.1735,
      "step": 5452
    },
    {
      "epoch": 2.013663220088626,
      "grad_norm": 0.23326873779296875,
      "learning_rate": 6.579628033008992e-05,
      "loss": 0.1657,
      "step": 5453
    },
    {
      "epoch": 2.014032496307238,
      "grad_norm": 0.27446603775024414,
      "learning_rate": 6.577164675452643e-05,
      "loss": 0.1713,
      "step": 5454
    },
    {
      "epoch": 2.0144017725258494,
      "grad_norm": 0.3505135476589203,
      "learning_rate": 6.574701317896293e-05,
      "loss": 0.1818,
      "step": 5455
    },
    {
      "epoch": 2.014771048744461,
      "grad_norm": 0.26045656204223633,
      "learning_rate": 6.572237960339945e-05,
      "loss": 0.1604,
      "step": 5456
    },
    {
      "epoch": 2.015140324963072,
      "grad_norm": 0.33755213022232056,
      "learning_rate": 6.569774602783593e-05,
      "loss": 0.1634,
      "step": 5457
    },
    {
      "epoch": 2.0155096011816838,
      "grad_norm": 0.2607596814632416,
      "learning_rate": 6.567311245227245e-05,
      "loss": 0.181,
      "step": 5458
    },
    {
      "epoch": 2.0158788774002954,
      "grad_norm": 0.33283165097236633,
      "learning_rate": 6.564847887670895e-05,
      "loss": 0.1899,
      "step": 5459
    },
    {
      "epoch": 2.016248153618907,
      "grad_norm": 0.24610725045204163,
      "learning_rate": 6.562384530114546e-05,
      "loss": 0.1495,
      "step": 5460
    },
    {
      "epoch": 2.0166174298375186,
      "grad_norm": 0.2705758810043335,
      "learning_rate": 6.559921172558197e-05,
      "loss": 0.1493,
      "step": 5461
    },
    {
      "epoch": 2.01698670605613,
      "grad_norm": 0.2775185704231262,
      "learning_rate": 6.557457815001847e-05,
      "loss": 0.1896,
      "step": 5462
    },
    {
      "epoch": 2.0173559822747413,
      "grad_norm": 0.23754863440990448,
      "learning_rate": 6.554994457445498e-05,
      "loss": 0.1584,
      "step": 5463
    },
    {
      "epoch": 2.017725258493353,
      "grad_norm": 0.25853317975997925,
      "learning_rate": 6.552531099889148e-05,
      "loss": 0.1713,
      "step": 5464
    },
    {
      "epoch": 2.0180945347119645,
      "grad_norm": 0.2862049341201782,
      "learning_rate": 6.5500677423328e-05,
      "loss": 0.1737,
      "step": 5465
    },
    {
      "epoch": 2.018463810930576,
      "grad_norm": 0.252202570438385,
      "learning_rate": 6.54760438477645e-05,
      "loss": 0.1961,
      "step": 5466
    },
    {
      "epoch": 2.0188330871491877,
      "grad_norm": 0.3276780843734741,
      "learning_rate": 6.545141027220101e-05,
      "loss": 0.1951,
      "step": 5467
    },
    {
      "epoch": 2.019202363367799,
      "grad_norm": 0.2186700999736786,
      "learning_rate": 6.542677669663751e-05,
      "loss": 0.1542,
      "step": 5468
    },
    {
      "epoch": 2.0195716395864105,
      "grad_norm": 0.263078510761261,
      "learning_rate": 6.540214312107403e-05,
      "loss": 0.1858,
      "step": 5469
    },
    {
      "epoch": 2.019940915805022,
      "grad_norm": 0.2864452004432678,
      "learning_rate": 6.537750954551053e-05,
      "loss": 0.2138,
      "step": 5470
    },
    {
      "epoch": 2.0203101920236337,
      "grad_norm": 0.2598871886730194,
      "learning_rate": 6.535287596994703e-05,
      "loss": 0.179,
      "step": 5471
    },
    {
      "epoch": 2.0206794682422453,
      "grad_norm": 0.25144585967063904,
      "learning_rate": 6.532824239438355e-05,
      "loss": 0.1509,
      "step": 5472
    },
    {
      "epoch": 2.021048744460857,
      "grad_norm": 0.2702910304069519,
      "learning_rate": 6.530360881882005e-05,
      "loss": 0.1444,
      "step": 5473
    },
    {
      "epoch": 2.021418020679468,
      "grad_norm": 0.3086947202682495,
      "learning_rate": 6.527897524325656e-05,
      "loss": 0.1638,
      "step": 5474
    },
    {
      "epoch": 2.0217872968980797,
      "grad_norm": 0.22952225804328918,
      "learning_rate": 6.525434166769306e-05,
      "loss": 0.1438,
      "step": 5475
    },
    {
      "epoch": 2.0221565731166913,
      "grad_norm": 0.23374100029468536,
      "learning_rate": 6.522970809212958e-05,
      "loss": 0.1483,
      "step": 5476
    },
    {
      "epoch": 2.022525849335303,
      "grad_norm": 0.2832000255584717,
      "learning_rate": 6.520507451656608e-05,
      "loss": 0.1914,
      "step": 5477
    },
    {
      "epoch": 2.0228951255539145,
      "grad_norm": 0.23163892328739166,
      "learning_rate": 6.518044094100258e-05,
      "loss": 0.1499,
      "step": 5478
    },
    {
      "epoch": 2.0232644017725256,
      "grad_norm": 0.27283135056495667,
      "learning_rate": 6.51558073654391e-05,
      "loss": 0.1714,
      "step": 5479
    },
    {
      "epoch": 2.0236336779911372,
      "grad_norm": 0.20658475160598755,
      "learning_rate": 6.51311737898756e-05,
      "loss": 0.1277,
      "step": 5480
    },
    {
      "epoch": 2.024002954209749,
      "grad_norm": 0.25312045216560364,
      "learning_rate": 6.510654021431211e-05,
      "loss": 0.1585,
      "step": 5481
    },
    {
      "epoch": 2.0243722304283605,
      "grad_norm": 0.27742844820022583,
      "learning_rate": 6.508190663874861e-05,
      "loss": 0.1957,
      "step": 5482
    },
    {
      "epoch": 2.024741506646972,
      "grad_norm": 0.2213859111070633,
      "learning_rate": 6.505727306318512e-05,
      "loss": 0.1625,
      "step": 5483
    },
    {
      "epoch": 2.0251107828655837,
      "grad_norm": 0.24842393398284912,
      "learning_rate": 6.503263948762163e-05,
      "loss": 0.1702,
      "step": 5484
    },
    {
      "epoch": 2.025480059084195,
      "grad_norm": 0.3059009313583374,
      "learning_rate": 6.500800591205814e-05,
      "loss": 0.1568,
      "step": 5485
    },
    {
      "epoch": 2.0258493353028064,
      "grad_norm": 0.2236127257347107,
      "learning_rate": 6.498337233649464e-05,
      "loss": 0.1861,
      "step": 5486
    },
    {
      "epoch": 2.026218611521418,
      "grad_norm": 0.27241796255111694,
      "learning_rate": 6.495873876093114e-05,
      "loss": 0.1783,
      "step": 5487
    },
    {
      "epoch": 2.0265878877400296,
      "grad_norm": 0.27785786986351013,
      "learning_rate": 6.493410518536766e-05,
      "loss": 0.1795,
      "step": 5488
    },
    {
      "epoch": 2.0269571639586412,
      "grad_norm": 0.27707716822624207,
      "learning_rate": 6.490947160980416e-05,
      "loss": 0.1861,
      "step": 5489
    },
    {
      "epoch": 2.0273264401772524,
      "grad_norm": 0.23874297738075256,
      "learning_rate": 6.488483803424067e-05,
      "loss": 0.1639,
      "step": 5490
    },
    {
      "epoch": 2.027695716395864,
      "grad_norm": 0.30940431356430054,
      "learning_rate": 6.486020445867717e-05,
      "loss": 0.1895,
      "step": 5491
    },
    {
      "epoch": 2.0280649926144756,
      "grad_norm": 0.25607529282569885,
      "learning_rate": 6.483557088311369e-05,
      "loss": 0.1672,
      "step": 5492
    },
    {
      "epoch": 2.028434268833087,
      "grad_norm": 0.2539975345134735,
      "learning_rate": 6.481093730755019e-05,
      "loss": 0.1549,
      "step": 5493
    },
    {
      "epoch": 2.028803545051699,
      "grad_norm": 0.26156702637672424,
      "learning_rate": 6.478630373198669e-05,
      "loss": 0.1491,
      "step": 5494
    },
    {
      "epoch": 2.0291728212703104,
      "grad_norm": 0.24374203383922577,
      "learning_rate": 6.47616701564232e-05,
      "loss": 0.14,
      "step": 5495
    },
    {
      "epoch": 2.0295420974889216,
      "grad_norm": 0.2342911958694458,
      "learning_rate": 6.47370365808597e-05,
      "loss": 0.1472,
      "step": 5496
    },
    {
      "epoch": 2.029911373707533,
      "grad_norm": 0.21979846060276031,
      "learning_rate": 6.471240300529622e-05,
      "loss": 0.1593,
      "step": 5497
    },
    {
      "epoch": 2.0302806499261448,
      "grad_norm": 0.22841975092887878,
      "learning_rate": 6.468776942973272e-05,
      "loss": 0.1472,
      "step": 5498
    },
    {
      "epoch": 2.0306499261447564,
      "grad_norm": 0.31238147616386414,
      "learning_rate": 6.466313585416924e-05,
      "loss": 0.1901,
      "step": 5499
    },
    {
      "epoch": 2.031019202363368,
      "grad_norm": 0.24541234970092773,
      "learning_rate": 6.463850227860574e-05,
      "loss": 0.1618,
      "step": 5500
    },
    {
      "epoch": 2.031019202363368,
      "eval_loss": 0.2564271092414856,
      "eval_runtime": 5.8692,
      "eval_samples_per_second": 8.519,
      "eval_steps_per_second": 1.193,
      "step": 5500
    },
    {
      "epoch": 2.031388478581979,
      "grad_norm": 0.2768562138080597,
      "learning_rate": 6.461386870304225e-05,
      "loss": 0.1751,
      "step": 5501
    },
    {
      "epoch": 2.0317577548005907,
      "grad_norm": 0.2732270658016205,
      "learning_rate": 6.458923512747875e-05,
      "loss": 0.1786,
      "step": 5502
    },
    {
      "epoch": 2.0321270310192023,
      "grad_norm": 0.296970933675766,
      "learning_rate": 6.456460155191526e-05,
      "loss": 0.1761,
      "step": 5503
    },
    {
      "epoch": 2.032496307237814,
      "grad_norm": 0.2972950339317322,
      "learning_rate": 6.453996797635177e-05,
      "loss": 0.1774,
      "step": 5504
    },
    {
      "epoch": 2.0328655834564255,
      "grad_norm": 0.24320095777511597,
      "learning_rate": 6.451533440078827e-05,
      "loss": 0.1708,
      "step": 5505
    },
    {
      "epoch": 2.033234859675037,
      "grad_norm": 0.2769164443016052,
      "learning_rate": 6.449070082522479e-05,
      "loss": 0.1649,
      "step": 5506
    },
    {
      "epoch": 2.0336041358936483,
      "grad_norm": 0.2682860195636749,
      "learning_rate": 6.446606724966129e-05,
      "loss": 0.1511,
      "step": 5507
    },
    {
      "epoch": 2.03397341211226,
      "grad_norm": 0.22829866409301758,
      "learning_rate": 6.44414336740978e-05,
      "loss": 0.1419,
      "step": 5508
    },
    {
      "epoch": 2.0343426883308715,
      "grad_norm": 0.34367161989212036,
      "learning_rate": 6.44168000985343e-05,
      "loss": 0.1817,
      "step": 5509
    },
    {
      "epoch": 2.034711964549483,
      "grad_norm": 0.2511090636253357,
      "learning_rate": 6.43921665229708e-05,
      "loss": 0.1587,
      "step": 5510
    },
    {
      "epoch": 2.0350812407680947,
      "grad_norm": 0.30627578496932983,
      "learning_rate": 6.436753294740732e-05,
      "loss": 0.1792,
      "step": 5511
    },
    {
      "epoch": 2.035450516986706,
      "grad_norm": 0.26363179087638855,
      "learning_rate": 6.434289937184382e-05,
      "loss": 0.1718,
      "step": 5512
    },
    {
      "epoch": 2.0358197932053175,
      "grad_norm": 0.2435271292924881,
      "learning_rate": 6.431826579628033e-05,
      "loss": 0.162,
      "step": 5513
    },
    {
      "epoch": 2.036189069423929,
      "grad_norm": 0.23334969580173492,
      "learning_rate": 6.429363222071683e-05,
      "loss": 0.1541,
      "step": 5514
    },
    {
      "epoch": 2.0365583456425407,
      "grad_norm": 0.22303305566310883,
      "learning_rate": 6.426899864515335e-05,
      "loss": 0.1788,
      "step": 5515
    },
    {
      "epoch": 2.0369276218611523,
      "grad_norm": 0.25421905517578125,
      "learning_rate": 6.424436506958985e-05,
      "loss": 0.1942,
      "step": 5516
    },
    {
      "epoch": 2.037296898079764,
      "grad_norm": 0.21403385698795319,
      "learning_rate": 6.421973149402637e-05,
      "loss": 0.1451,
      "step": 5517
    },
    {
      "epoch": 2.037666174298375,
      "grad_norm": 0.2948196232318878,
      "learning_rate": 6.419509791846287e-05,
      "loss": 0.1988,
      "step": 5518
    },
    {
      "epoch": 2.0380354505169866,
      "grad_norm": 0.2889154553413391,
      "learning_rate": 6.417046434289937e-05,
      "loss": 0.1902,
      "step": 5519
    },
    {
      "epoch": 2.0384047267355982,
      "grad_norm": 0.2615763545036316,
      "learning_rate": 6.414583076733588e-05,
      "loss": 0.1752,
      "step": 5520
    },
    {
      "epoch": 2.03877400295421,
      "grad_norm": 0.2170265167951584,
      "learning_rate": 6.412119719177238e-05,
      "loss": 0.1562,
      "step": 5521
    },
    {
      "epoch": 2.0391432791728215,
      "grad_norm": 0.26524242758750916,
      "learning_rate": 6.40965636162089e-05,
      "loss": 0.151,
      "step": 5522
    },
    {
      "epoch": 2.0395125553914326,
      "grad_norm": 0.2578350007534027,
      "learning_rate": 6.40719300406454e-05,
      "loss": 0.1652,
      "step": 5523
    },
    {
      "epoch": 2.039881831610044,
      "grad_norm": 0.24465960264205933,
      "learning_rate": 6.404729646508191e-05,
      "loss": 0.1788,
      "step": 5524
    },
    {
      "epoch": 2.040251107828656,
      "grad_norm": 0.27955836057662964,
      "learning_rate": 6.402266288951841e-05,
      "loss": 0.1638,
      "step": 5525
    },
    {
      "epoch": 2.0406203840472674,
      "grad_norm": 0.3361818492412567,
      "learning_rate": 6.399802931395492e-05,
      "loss": 0.1654,
      "step": 5526
    },
    {
      "epoch": 2.040989660265879,
      "grad_norm": 0.3011032044887543,
      "learning_rate": 6.397339573839143e-05,
      "loss": 0.1583,
      "step": 5527
    },
    {
      "epoch": 2.04135893648449,
      "grad_norm": 0.34659239649772644,
      "learning_rate": 6.394876216282793e-05,
      "loss": 0.2047,
      "step": 5528
    },
    {
      "epoch": 2.041728212703102,
      "grad_norm": 0.24762535095214844,
      "learning_rate": 6.392412858726445e-05,
      "loss": 0.1635,
      "step": 5529
    },
    {
      "epoch": 2.0420974889217134,
      "grad_norm": 0.25023153424263,
      "learning_rate": 6.389949501170095e-05,
      "loss": 0.165,
      "step": 5530
    },
    {
      "epoch": 2.042466765140325,
      "grad_norm": 0.34106186032295227,
      "learning_rate": 6.387486143613746e-05,
      "loss": 0.19,
      "step": 5531
    },
    {
      "epoch": 2.0428360413589366,
      "grad_norm": 0.21632955968379974,
      "learning_rate": 6.385022786057396e-05,
      "loss": 0.1568,
      "step": 5532
    },
    {
      "epoch": 2.043205317577548,
      "grad_norm": 0.24720235168933868,
      "learning_rate": 6.382559428501046e-05,
      "loss": 0.1443,
      "step": 5533
    },
    {
      "epoch": 2.0435745937961594,
      "grad_norm": 0.23382613062858582,
      "learning_rate": 6.380096070944698e-05,
      "loss": 0.1661,
      "step": 5534
    },
    {
      "epoch": 2.043943870014771,
      "grad_norm": 0.277658075094223,
      "learning_rate": 6.377632713388348e-05,
      "loss": 0.1561,
      "step": 5535
    },
    {
      "epoch": 2.0443131462333826,
      "grad_norm": 0.2528332769870758,
      "learning_rate": 6.375169355832e-05,
      "loss": 0.1645,
      "step": 5536
    },
    {
      "epoch": 2.044682422451994,
      "grad_norm": 0.30018797516822815,
      "learning_rate": 6.37270599827565e-05,
      "loss": 0.1701,
      "step": 5537
    },
    {
      "epoch": 2.0450516986706058,
      "grad_norm": 0.2822677195072174,
      "learning_rate": 6.370242640719301e-05,
      "loss": 0.1809,
      "step": 5538
    },
    {
      "epoch": 2.045420974889217,
      "grad_norm": 0.2654092013835907,
      "learning_rate": 6.367779283162951e-05,
      "loss": 0.1632,
      "step": 5539
    },
    {
      "epoch": 2.0457902511078285,
      "grad_norm": 0.3735482394695282,
      "learning_rate": 6.365315925606603e-05,
      "loss": 0.1651,
      "step": 5540
    },
    {
      "epoch": 2.04615952732644,
      "grad_norm": 0.2744656205177307,
      "learning_rate": 6.362852568050253e-05,
      "loss": 0.1583,
      "step": 5541
    },
    {
      "epoch": 2.0465288035450517,
      "grad_norm": 0.275022029876709,
      "learning_rate": 6.360389210493903e-05,
      "loss": 0.1627,
      "step": 5542
    },
    {
      "epoch": 2.0468980797636633,
      "grad_norm": 0.2592197060585022,
      "learning_rate": 6.357925852937554e-05,
      "loss": 0.1931,
      "step": 5543
    },
    {
      "epoch": 2.047267355982275,
      "grad_norm": 0.27321985363960266,
      "learning_rate": 6.355462495381204e-05,
      "loss": 0.1649,
      "step": 5544
    },
    {
      "epoch": 2.047636632200886,
      "grad_norm": 0.2739521265029907,
      "learning_rate": 6.352999137824856e-05,
      "loss": 0.1475,
      "step": 5545
    },
    {
      "epoch": 2.0480059084194977,
      "grad_norm": 0.36737266182899475,
      "learning_rate": 6.350535780268506e-05,
      "loss": 0.1709,
      "step": 5546
    },
    {
      "epoch": 2.0483751846381093,
      "grad_norm": 0.22715000808238983,
      "learning_rate": 6.348072422712157e-05,
      "loss": 0.1704,
      "step": 5547
    },
    {
      "epoch": 2.048744460856721,
      "grad_norm": 0.31132203340530396,
      "learning_rate": 6.345609065155808e-05,
      "loss": 0.1977,
      "step": 5548
    },
    {
      "epoch": 2.0491137370753325,
      "grad_norm": 0.3072618842124939,
      "learning_rate": 6.343145707599458e-05,
      "loss": 0.1749,
      "step": 5549
    },
    {
      "epoch": 2.0494830132939437,
      "grad_norm": 0.2475789487361908,
      "learning_rate": 6.340682350043109e-05,
      "loss": 0.1598,
      "step": 5550
    },
    {
      "epoch": 2.0494830132939437,
      "eval_loss": 0.25815775990486145,
      "eval_runtime": 5.8602,
      "eval_samples_per_second": 8.532,
      "eval_steps_per_second": 1.194,
      "step": 5550
    },
    {
      "epoch": 2.0498522895125553,
      "grad_norm": 0.23874615132808685,
      "learning_rate": 6.338218992486759e-05,
      "loss": 0.1459,
      "step": 5551
    },
    {
      "epoch": 2.050221565731167,
      "grad_norm": 0.25430697202682495,
      "learning_rate": 6.33575563493041e-05,
      "loss": 0.192,
      "step": 5552
    },
    {
      "epoch": 2.0505908419497785,
      "grad_norm": 0.2793968915939331,
      "learning_rate": 6.333292277374061e-05,
      "loss": 0.1772,
      "step": 5553
    },
    {
      "epoch": 2.05096011816839,
      "grad_norm": 0.26261192560195923,
      "learning_rate": 6.330828919817712e-05,
      "loss": 0.1658,
      "step": 5554
    },
    {
      "epoch": 2.0513293943870017,
      "grad_norm": 0.3066626489162445,
      "learning_rate": 6.328365562261362e-05,
      "loss": 0.1926,
      "step": 5555
    },
    {
      "epoch": 2.051698670605613,
      "grad_norm": 0.2415955513715744,
      "learning_rate": 6.325902204705014e-05,
      "loss": 0.1712,
      "step": 5556
    },
    {
      "epoch": 2.0520679468242244,
      "grad_norm": 0.2773614525794983,
      "learning_rate": 6.323438847148664e-05,
      "loss": 0.1604,
      "step": 5557
    },
    {
      "epoch": 2.052437223042836,
      "grad_norm": 0.2865541875362396,
      "learning_rate": 6.320975489592314e-05,
      "loss": 0.1712,
      "step": 5558
    },
    {
      "epoch": 2.0528064992614476,
      "grad_norm": 0.351630836725235,
      "learning_rate": 6.318512132035965e-05,
      "loss": 0.1914,
      "step": 5559
    },
    {
      "epoch": 2.0531757754800593,
      "grad_norm": 0.2456756830215454,
      "learning_rate": 6.316048774479616e-05,
      "loss": 0.1666,
      "step": 5560
    },
    {
      "epoch": 2.0535450516986704,
      "grad_norm": 0.23966416716575623,
      "learning_rate": 6.313585416923267e-05,
      "loss": 0.1635,
      "step": 5561
    },
    {
      "epoch": 2.053914327917282,
      "grad_norm": 0.2779577970504761,
      "learning_rate": 6.311122059366917e-05,
      "loss": 0.158,
      "step": 5562
    },
    {
      "epoch": 2.0542836041358936,
      "grad_norm": 0.31444647908210754,
      "learning_rate": 6.308658701810569e-05,
      "loss": 0.1747,
      "step": 5563
    },
    {
      "epoch": 2.054652880354505,
      "grad_norm": 0.2886969745159149,
      "learning_rate": 6.306195344254219e-05,
      "loss": 0.185,
      "step": 5564
    },
    {
      "epoch": 2.055022156573117,
      "grad_norm": 0.2612532377243042,
      "learning_rate": 6.303731986697869e-05,
      "loss": 0.1785,
      "step": 5565
    },
    {
      "epoch": 2.0553914327917284,
      "grad_norm": 0.22525450587272644,
      "learning_rate": 6.30126862914152e-05,
      "loss": 0.1506,
      "step": 5566
    },
    {
      "epoch": 2.0557607090103396,
      "grad_norm": 0.2780967354774475,
      "learning_rate": 6.29880527158517e-05,
      "loss": 0.1709,
      "step": 5567
    },
    {
      "epoch": 2.056129985228951,
      "grad_norm": 0.29679059982299805,
      "learning_rate": 6.296341914028822e-05,
      "loss": 0.1633,
      "step": 5568
    },
    {
      "epoch": 2.056499261447563,
      "grad_norm": 0.27485817670822144,
      "learning_rate": 6.293878556472472e-05,
      "loss": 0.1782,
      "step": 5569
    },
    {
      "epoch": 2.0568685376661744,
      "grad_norm": 0.2389979511499405,
      "learning_rate": 6.291415198916123e-05,
      "loss": 0.1636,
      "step": 5570
    },
    {
      "epoch": 2.057237813884786,
      "grad_norm": 0.2602463662624359,
      "learning_rate": 6.288951841359774e-05,
      "loss": 0.1719,
      "step": 5571
    },
    {
      "epoch": 2.057607090103397,
      "grad_norm": 0.25759291648864746,
      "learning_rate": 6.286488483803425e-05,
      "loss": 0.1322,
      "step": 5572
    },
    {
      "epoch": 2.0579763663220088,
      "grad_norm": 0.24198909103870392,
      "learning_rate": 6.284025126247075e-05,
      "loss": 0.1609,
      "step": 5573
    },
    {
      "epoch": 2.0583456425406204,
      "grad_norm": 0.301605224609375,
      "learning_rate": 6.281561768690725e-05,
      "loss": 0.1893,
      "step": 5574
    },
    {
      "epoch": 2.058714918759232,
      "grad_norm": 0.3183390498161316,
      "learning_rate": 6.279098411134377e-05,
      "loss": 0.1738,
      "step": 5575
    },
    {
      "epoch": 2.0590841949778436,
      "grad_norm": 0.27507612109184265,
      "learning_rate": 6.276635053578027e-05,
      "loss": 0.176,
      "step": 5576
    },
    {
      "epoch": 2.059453471196455,
      "grad_norm": 0.2369697540998459,
      "learning_rate": 6.274171696021678e-05,
      "loss": 0.1693,
      "step": 5577
    },
    {
      "epoch": 2.0598227474150663,
      "grad_norm": 0.2782268524169922,
      "learning_rate": 6.271708338465328e-05,
      "loss": 0.1729,
      "step": 5578
    },
    {
      "epoch": 2.060192023633678,
      "grad_norm": 0.2308182567358017,
      "learning_rate": 6.26924498090898e-05,
      "loss": 0.1646,
      "step": 5579
    },
    {
      "epoch": 2.0605612998522895,
      "grad_norm": 0.30158933997154236,
      "learning_rate": 6.26678162335263e-05,
      "loss": 0.1658,
      "step": 5580
    },
    {
      "epoch": 2.060930576070901,
      "grad_norm": 0.21813644468784332,
      "learning_rate": 6.26431826579628e-05,
      "loss": 0.1644,
      "step": 5581
    },
    {
      "epoch": 2.0612998522895127,
      "grad_norm": 0.21864870190620422,
      "learning_rate": 6.261854908239932e-05,
      "loss": 0.1573,
      "step": 5582
    },
    {
      "epoch": 2.061669128508124,
      "grad_norm": 0.27184343338012695,
      "learning_rate": 6.259391550683582e-05,
      "loss": 0.1883,
      "step": 5583
    },
    {
      "epoch": 2.0620384047267355,
      "grad_norm": 0.2512473464012146,
      "learning_rate": 6.256928193127233e-05,
      "loss": 0.1885,
      "step": 5584
    },
    {
      "epoch": 2.062407680945347,
      "grad_norm": 0.25281256437301636,
      "learning_rate": 6.254464835570883e-05,
      "loss": 0.1699,
      "step": 5585
    },
    {
      "epoch": 2.0627769571639587,
      "grad_norm": 0.25659340620040894,
      "learning_rate": 6.252001478014535e-05,
      "loss": 0.179,
      "step": 5586
    },
    {
      "epoch": 2.0631462333825703,
      "grad_norm": 0.2874915897846222,
      "learning_rate": 6.249538120458185e-05,
      "loss": 0.1606,
      "step": 5587
    },
    {
      "epoch": 2.0635155096011815,
      "grad_norm": 0.24694329500198364,
      "learning_rate": 6.247074762901836e-05,
      "loss": 0.1731,
      "step": 5588
    },
    {
      "epoch": 2.063884785819793,
      "grad_norm": 0.3023461401462555,
      "learning_rate": 6.244611405345486e-05,
      "loss": 0.199,
      "step": 5589
    },
    {
      "epoch": 2.0642540620384047,
      "grad_norm": 0.2268141806125641,
      "learning_rate": 6.242148047789136e-05,
      "loss": 0.1469,
      "step": 5590
    },
    {
      "epoch": 2.0646233382570163,
      "grad_norm": 0.2839057743549347,
      "learning_rate": 6.239684690232788e-05,
      "loss": 0.2117,
      "step": 5591
    },
    {
      "epoch": 2.064992614475628,
      "grad_norm": 0.24757793545722961,
      "learning_rate": 6.237221332676438e-05,
      "loss": 0.1649,
      "step": 5592
    },
    {
      "epoch": 2.0653618906942395,
      "grad_norm": 0.2589331865310669,
      "learning_rate": 6.23475797512009e-05,
      "loss": 0.1732,
      "step": 5593
    },
    {
      "epoch": 2.0657311669128506,
      "grad_norm": 0.2764144539833069,
      "learning_rate": 6.23229461756374e-05,
      "loss": 0.1644,
      "step": 5594
    },
    {
      "epoch": 2.0661004431314622,
      "grad_norm": 0.27270591259002686,
      "learning_rate": 6.229831260007391e-05,
      "loss": 0.1705,
      "step": 5595
    },
    {
      "epoch": 2.066469719350074,
      "grad_norm": 0.2604628801345825,
      "learning_rate": 6.227367902451041e-05,
      "loss": 0.1793,
      "step": 5596
    },
    {
      "epoch": 2.0668389955686854,
      "grad_norm": 0.255330890417099,
      "learning_rate": 6.224904544894691e-05,
      "loss": 0.1455,
      "step": 5597
    },
    {
      "epoch": 2.067208271787297,
      "grad_norm": 0.23751473426818848,
      "learning_rate": 6.222441187338343e-05,
      "loss": 0.1654,
      "step": 5598
    },
    {
      "epoch": 2.067577548005908,
      "grad_norm": 0.27682632207870483,
      "learning_rate": 6.219977829781993e-05,
      "loss": 0.1736,
      "step": 5599
    },
    {
      "epoch": 2.06794682422452,
      "grad_norm": 0.24163684248924255,
      "learning_rate": 6.217514472225644e-05,
      "loss": 0.1672,
      "step": 5600
    },
    {
      "epoch": 2.06794682422452,
      "eval_loss": 0.260868102312088,
      "eval_runtime": 5.8622,
      "eval_samples_per_second": 8.529,
      "eval_steps_per_second": 1.194,
      "step": 5600
    },
    {
      "epoch": 2.0683161004431314,
      "grad_norm": 0.25598397850990295,
      "learning_rate": 6.215051114669294e-05,
      "loss": 0.1682,
      "step": 5601
    },
    {
      "epoch": 2.068685376661743,
      "grad_norm": 0.30159834027290344,
      "learning_rate": 6.212587757112946e-05,
      "loss": 0.2142,
      "step": 5602
    },
    {
      "epoch": 2.0690546528803546,
      "grad_norm": 0.25344955921173096,
      "learning_rate": 6.210124399556596e-05,
      "loss": 0.1854,
      "step": 5603
    },
    {
      "epoch": 2.069423929098966,
      "grad_norm": 0.24966850876808167,
      "learning_rate": 6.207661042000246e-05,
      "loss": 0.1933,
      "step": 5604
    },
    {
      "epoch": 2.0697932053175774,
      "grad_norm": 0.2880747616291046,
      "learning_rate": 6.205197684443898e-05,
      "loss": 0.1898,
      "step": 5605
    },
    {
      "epoch": 2.070162481536189,
      "grad_norm": 0.2810298800468445,
      "learning_rate": 6.202734326887548e-05,
      "loss": 0.1896,
      "step": 5606
    },
    {
      "epoch": 2.0705317577548006,
      "grad_norm": 0.2763424217700958,
      "learning_rate": 6.200270969331199e-05,
      "loss": 0.1501,
      "step": 5607
    },
    {
      "epoch": 2.070901033973412,
      "grad_norm": 0.36380735039711,
      "learning_rate": 6.197807611774849e-05,
      "loss": 0.1919,
      "step": 5608
    },
    {
      "epoch": 2.071270310192024,
      "grad_norm": 0.28891077637672424,
      "learning_rate": 6.195344254218501e-05,
      "loss": 0.1598,
      "step": 5609
    },
    {
      "epoch": 2.071639586410635,
      "grad_norm": 0.23979365825653076,
      "learning_rate": 6.192880896662151e-05,
      "loss": 0.1534,
      "step": 5610
    },
    {
      "epoch": 2.0720088626292466,
      "grad_norm": 0.3012255132198334,
      "learning_rate": 6.190417539105802e-05,
      "loss": 0.1798,
      "step": 5611
    },
    {
      "epoch": 2.072378138847858,
      "grad_norm": 0.27794837951660156,
      "learning_rate": 6.187954181549452e-05,
      "loss": 0.1948,
      "step": 5612
    },
    {
      "epoch": 2.0727474150664698,
      "grad_norm": 0.2556271255016327,
      "learning_rate": 6.185490823993103e-05,
      "loss": 0.1652,
      "step": 5613
    },
    {
      "epoch": 2.0731166912850814,
      "grad_norm": 0.3252602219581604,
      "learning_rate": 6.183027466436754e-05,
      "loss": 0.1782,
      "step": 5614
    },
    {
      "epoch": 2.073485967503693,
      "grad_norm": 0.32153990864753723,
      "learning_rate": 6.180564108880404e-05,
      "loss": 0.1885,
      "step": 5615
    },
    {
      "epoch": 2.073855243722304,
      "grad_norm": 0.3016199469566345,
      "learning_rate": 6.178100751324056e-05,
      "loss": 0.1746,
      "step": 5616
    },
    {
      "epoch": 2.0742245199409157,
      "grad_norm": 0.25367647409439087,
      "learning_rate": 6.175637393767706e-05,
      "loss": 0.1647,
      "step": 5617
    },
    {
      "epoch": 2.0745937961595273,
      "grad_norm": 0.34137043356895447,
      "learning_rate": 6.173174036211357e-05,
      "loss": 0.1779,
      "step": 5618
    },
    {
      "epoch": 2.074963072378139,
      "grad_norm": 0.26231876015663147,
      "learning_rate": 6.170710678655007e-05,
      "loss": 0.1695,
      "step": 5619
    },
    {
      "epoch": 2.0753323485967505,
      "grad_norm": 0.26675739884376526,
      "learning_rate": 6.168247321098657e-05,
      "loss": 0.1706,
      "step": 5620
    },
    {
      "epoch": 2.0757016248153617,
      "grad_norm": 0.3152809739112854,
      "learning_rate": 6.165783963542309e-05,
      "loss": 0.1905,
      "step": 5621
    },
    {
      "epoch": 2.0760709010339733,
      "grad_norm": 0.24944278597831726,
      "learning_rate": 6.163320605985959e-05,
      "loss": 0.1589,
      "step": 5622
    },
    {
      "epoch": 2.076440177252585,
      "grad_norm": 0.23552517592906952,
      "learning_rate": 6.16085724842961e-05,
      "loss": 0.1459,
      "step": 5623
    },
    {
      "epoch": 2.0768094534711965,
      "grad_norm": 0.26706674695014954,
      "learning_rate": 6.15839389087326e-05,
      "loss": 0.1648,
      "step": 5624
    },
    {
      "epoch": 2.077178729689808,
      "grad_norm": 0.28164857625961304,
      "learning_rate": 6.155930533316912e-05,
      "loss": 0.1594,
      "step": 5625
    },
    {
      "epoch": 2.0775480059084197,
      "grad_norm": 0.2577347457408905,
      "learning_rate": 6.153467175760562e-05,
      "loss": 0.1957,
      "step": 5626
    },
    {
      "epoch": 2.077917282127031,
      "grad_norm": 0.29985523223876953,
      "learning_rate": 6.151003818204214e-05,
      "loss": 0.2031,
      "step": 5627
    },
    {
      "epoch": 2.0782865583456425,
      "grad_norm": 0.3037504255771637,
      "learning_rate": 6.148540460647864e-05,
      "loss": 0.1784,
      "step": 5628
    },
    {
      "epoch": 2.078655834564254,
      "grad_norm": 0.2820575535297394,
      "learning_rate": 6.146077103091514e-05,
      "loss": 0.1661,
      "step": 5629
    },
    {
      "epoch": 2.0790251107828657,
      "grad_norm": 0.30463945865631104,
      "learning_rate": 6.143613745535165e-05,
      "loss": 0.2109,
      "step": 5630
    },
    {
      "epoch": 2.0793943870014773,
      "grad_norm": 0.26707136631011963,
      "learning_rate": 6.141150387978815e-05,
      "loss": 0.1838,
      "step": 5631
    },
    {
      "epoch": 2.0797636632200884,
      "grad_norm": 0.29655009508132935,
      "learning_rate": 6.138687030422467e-05,
      "loss": 0.165,
      "step": 5632
    },
    {
      "epoch": 2.0801329394387,
      "grad_norm": 0.24325284361839294,
      "learning_rate": 6.136223672866117e-05,
      "loss": 0.1723,
      "step": 5633
    },
    {
      "epoch": 2.0805022156573116,
      "grad_norm": 0.26495853066444397,
      "learning_rate": 6.133760315309768e-05,
      "loss": 0.1832,
      "step": 5634
    },
    {
      "epoch": 2.0808714918759232,
      "grad_norm": 0.21448932588100433,
      "learning_rate": 6.131296957753418e-05,
      "loss": 0.1364,
      "step": 5635
    },
    {
      "epoch": 2.081240768094535,
      "grad_norm": 0.2707633376121521,
      "learning_rate": 6.128833600197069e-05,
      "loss": 0.1593,
      "step": 5636
    },
    {
      "epoch": 2.0816100443131464,
      "grad_norm": 0.3235558271408081,
      "learning_rate": 6.12637024264072e-05,
      "loss": 0.1714,
      "step": 5637
    },
    {
      "epoch": 2.0819793205317576,
      "grad_norm": 0.24738089740276337,
      "learning_rate": 6.12390688508437e-05,
      "loss": 0.1734,
      "step": 5638
    },
    {
      "epoch": 2.082348596750369,
      "grad_norm": 0.2667107880115509,
      "learning_rate": 6.121443527528022e-05,
      "loss": 0.1825,
      "step": 5639
    },
    {
      "epoch": 2.082717872968981,
      "grad_norm": 0.307449609041214,
      "learning_rate": 6.118980169971672e-05,
      "loss": 0.1724,
      "step": 5640
    },
    {
      "epoch": 2.0830871491875924,
      "grad_norm": 0.25481271743774414,
      "learning_rate": 6.116516812415323e-05,
      "loss": 0.1765,
      "step": 5641
    },
    {
      "epoch": 2.083456425406204,
      "grad_norm": 0.22381843626499176,
      "learning_rate": 6.114053454858973e-05,
      "loss": 0.1563,
      "step": 5642
    },
    {
      "epoch": 2.083825701624815,
      "grad_norm": 0.26986509561538696,
      "learning_rate": 6.111590097302625e-05,
      "loss": 0.1697,
      "step": 5643
    },
    {
      "epoch": 2.0841949778434268,
      "grad_norm": 0.22092558443546295,
      "learning_rate": 6.109126739746275e-05,
      "loss": 0.1517,
      "step": 5644
    },
    {
      "epoch": 2.0845642540620384,
      "grad_norm": 0.29334571957588196,
      "learning_rate": 6.106663382189925e-05,
      "loss": 0.1742,
      "step": 5645
    },
    {
      "epoch": 2.08493353028065,
      "grad_norm": 0.29605746269226074,
      "learning_rate": 6.104200024633576e-05,
      "loss": 0.1656,
      "step": 5646
    },
    {
      "epoch": 2.0853028064992616,
      "grad_norm": 0.28364232182502747,
      "learning_rate": 6.101736667077227e-05,
      "loss": 0.1706,
      "step": 5647
    },
    {
      "epoch": 2.085672082717873,
      "grad_norm": 0.21887627243995667,
      "learning_rate": 6.099273309520877e-05,
      "loss": 0.1366,
      "step": 5648
    },
    {
      "epoch": 2.0860413589364843,
      "grad_norm": 0.2478788197040558,
      "learning_rate": 6.096809951964528e-05,
      "loss": 0.1793,
      "step": 5649
    },
    {
      "epoch": 2.086410635155096,
      "grad_norm": 0.27544811367988586,
      "learning_rate": 6.094346594408179e-05,
      "loss": 0.2005,
      "step": 5650
    },
    {
      "epoch": 2.086410635155096,
      "eval_loss": 0.25888141989707947,
      "eval_runtime": 5.8564,
      "eval_samples_per_second": 8.538,
      "eval_steps_per_second": 1.195,
      "step": 5650
    },
    {
      "epoch": 2.0867799113737076,
      "grad_norm": 0.24575506150722504,
      "learning_rate": 6.09188323685183e-05,
      "loss": 0.1667,
      "step": 5651
    },
    {
      "epoch": 2.087149187592319,
      "grad_norm": 0.236062154173851,
      "learning_rate": 6.0894198792954805e-05,
      "loss": 0.1763,
      "step": 5652
    },
    {
      "epoch": 2.0875184638109308,
      "grad_norm": 0.245724618434906,
      "learning_rate": 6.086956521739131e-05,
      "loss": 0.165,
      "step": 5653
    },
    {
      "epoch": 2.087887740029542,
      "grad_norm": 0.22567129135131836,
      "learning_rate": 6.084493164182782e-05,
      "loss": 0.1586,
      "step": 5654
    },
    {
      "epoch": 2.0882570162481535,
      "grad_norm": 0.2369643896818161,
      "learning_rate": 6.082029806626433e-05,
      "loss": 0.1555,
      "step": 5655
    },
    {
      "epoch": 2.088626292466765,
      "grad_norm": 0.2373889833688736,
      "learning_rate": 6.079566449070083e-05,
      "loss": 0.1789,
      "step": 5656
    },
    {
      "epoch": 2.0889955686853767,
      "grad_norm": 0.2397778481245041,
      "learning_rate": 6.077103091513734e-05,
      "loss": 0.1715,
      "step": 5657
    },
    {
      "epoch": 2.0893648449039883,
      "grad_norm": 0.28299954533576965,
      "learning_rate": 6.0746397339573845e-05,
      "loss": 0.182,
      "step": 5658
    },
    {
      "epoch": 2.0897341211226,
      "grad_norm": 0.26077738404273987,
      "learning_rate": 6.072176376401035e-05,
      "loss": 0.1925,
      "step": 5659
    },
    {
      "epoch": 2.090103397341211,
      "grad_norm": 0.26217538118362427,
      "learning_rate": 6.069713018844686e-05,
      "loss": 0.1806,
      "step": 5660
    },
    {
      "epoch": 2.0904726735598227,
      "grad_norm": 0.2796938717365265,
      "learning_rate": 6.067249661288337e-05,
      "loss": 0.176,
      "step": 5661
    },
    {
      "epoch": 2.0908419497784343,
      "grad_norm": 0.2654780149459839,
      "learning_rate": 6.0647863037319877e-05,
      "loss": 0.1719,
      "step": 5662
    },
    {
      "epoch": 2.091211225997046,
      "grad_norm": 0.35504603385925293,
      "learning_rate": 6.0623229461756384e-05,
      "loss": 0.1886,
      "step": 5663
    },
    {
      "epoch": 2.0915805022156575,
      "grad_norm": 0.2887597680091858,
      "learning_rate": 6.0598595886192886e-05,
      "loss": 0.1772,
      "step": 5664
    },
    {
      "epoch": 2.0919497784342687,
      "grad_norm": 0.22546793520450592,
      "learning_rate": 6.057396231062939e-05,
      "loss": 0.1572,
      "step": 5665
    },
    {
      "epoch": 2.0923190546528803,
      "grad_norm": 0.29904258251190186,
      "learning_rate": 6.05493287350659e-05,
      "loss": 0.2133,
      "step": 5666
    },
    {
      "epoch": 2.092688330871492,
      "grad_norm": 0.24293597042560577,
      "learning_rate": 6.052469515950241e-05,
      "loss": 0.1823,
      "step": 5667
    },
    {
      "epoch": 2.0930576070901035,
      "grad_norm": 0.24957305192947388,
      "learning_rate": 6.050006158393892e-05,
      "loss": 0.1585,
      "step": 5668
    },
    {
      "epoch": 2.093426883308715,
      "grad_norm": 0.28926122188568115,
      "learning_rate": 6.0475428008375425e-05,
      "loss": 0.1833,
      "step": 5669
    },
    {
      "epoch": 2.0937961595273262,
      "grad_norm": 0.2231256365776062,
      "learning_rate": 6.045079443281193e-05,
      "loss": 0.1712,
      "step": 5670
    },
    {
      "epoch": 2.094165435745938,
      "grad_norm": 0.22925357520580292,
      "learning_rate": 6.042616085724844e-05,
      "loss": 0.166,
      "step": 5671
    },
    {
      "epoch": 2.0945347119645494,
      "grad_norm": 0.2851234972476959,
      "learning_rate": 6.040152728168494e-05,
      "loss": 0.1831,
      "step": 5672
    },
    {
      "epoch": 2.094903988183161,
      "grad_norm": 0.28612107038497925,
      "learning_rate": 6.037689370612145e-05,
      "loss": 0.1596,
      "step": 5673
    },
    {
      "epoch": 2.0952732644017726,
      "grad_norm": 0.26201173663139343,
      "learning_rate": 6.035226013055796e-05,
      "loss": 0.1571,
      "step": 5674
    },
    {
      "epoch": 2.0956425406203842,
      "grad_norm": 0.3124045431613922,
      "learning_rate": 6.0327626554994465e-05,
      "loss": 0.1723,
      "step": 5675
    },
    {
      "epoch": 2.0960118168389954,
      "grad_norm": 0.33962711691856384,
      "learning_rate": 6.030299297943097e-05,
      "loss": 0.1974,
      "step": 5676
    },
    {
      "epoch": 2.096381093057607,
      "grad_norm": 0.28157955408096313,
      "learning_rate": 6.027835940386748e-05,
      "loss": 0.1745,
      "step": 5677
    },
    {
      "epoch": 2.0967503692762186,
      "grad_norm": 0.27035462856292725,
      "learning_rate": 6.025372582830399e-05,
      "loss": 0.161,
      "step": 5678
    },
    {
      "epoch": 2.09711964549483,
      "grad_norm": 0.26044461131095886,
      "learning_rate": 6.022909225274049e-05,
      "loss": 0.1587,
      "step": 5679
    },
    {
      "epoch": 2.097488921713442,
      "grad_norm": 0.25345587730407715,
      "learning_rate": 6.0204458677177e-05,
      "loss": 0.1576,
      "step": 5680
    },
    {
      "epoch": 2.097858197932053,
      "grad_norm": 0.3048918843269348,
      "learning_rate": 6.0179825101613506e-05,
      "loss": 0.1781,
      "step": 5681
    },
    {
      "epoch": 2.0982274741506646,
      "grad_norm": 0.29959362745285034,
      "learning_rate": 6.0155191526050014e-05,
      "loss": 0.1477,
      "step": 5682
    },
    {
      "epoch": 2.098596750369276,
      "grad_norm": 0.38074469566345215,
      "learning_rate": 6.013055795048652e-05,
      "loss": 0.1791,
      "step": 5683
    },
    {
      "epoch": 2.098966026587888,
      "grad_norm": 0.278363436460495,
      "learning_rate": 6.010592437492303e-05,
      "loss": 0.1654,
      "step": 5684
    },
    {
      "epoch": 2.0993353028064994,
      "grad_norm": 0.28001198172569275,
      "learning_rate": 6.008129079935954e-05,
      "loss": 0.1688,
      "step": 5685
    },
    {
      "epoch": 2.099704579025111,
      "grad_norm": 0.32029715180397034,
      "learning_rate": 6.0056657223796045e-05,
      "loss": 0.1629,
      "step": 5686
    },
    {
      "epoch": 2.100073855243722,
      "grad_norm": 0.2780267298221588,
      "learning_rate": 6.0032023648232546e-05,
      "loss": 0.1759,
      "step": 5687
    },
    {
      "epoch": 2.1004431314623337,
      "grad_norm": 0.2489921599626541,
      "learning_rate": 6.0007390072669054e-05,
      "loss": 0.1575,
      "step": 5688
    },
    {
      "epoch": 2.1008124076809453,
      "grad_norm": 0.2580353915691376,
      "learning_rate": 5.9982756497105555e-05,
      "loss": 0.1509,
      "step": 5689
    },
    {
      "epoch": 2.101181683899557,
      "grad_norm": 0.24058429896831512,
      "learning_rate": 5.9958122921542056e-05,
      "loss": 0.1673,
      "step": 5690
    },
    {
      "epoch": 2.1015509601181686,
      "grad_norm": 0.2796524167060852,
      "learning_rate": 5.9933489345978564e-05,
      "loss": 0.1593,
      "step": 5691
    },
    {
      "epoch": 2.1019202363367797,
      "grad_norm": 0.2787950336933136,
      "learning_rate": 5.990885577041507e-05,
      "loss": 0.1819,
      "step": 5692
    },
    {
      "epoch": 2.1022895125553913,
      "grad_norm": 0.2514388859272003,
      "learning_rate": 5.988422219485158e-05,
      "loss": 0.1902,
      "step": 5693
    },
    {
      "epoch": 2.102658788774003,
      "grad_norm": 0.2770799696445465,
      "learning_rate": 5.985958861928809e-05,
      "loss": 0.1737,
      "step": 5694
    },
    {
      "epoch": 2.1030280649926145,
      "grad_norm": 0.2315564602613449,
      "learning_rate": 5.9834955043724595e-05,
      "loss": 0.1452,
      "step": 5695
    },
    {
      "epoch": 2.103397341211226,
      "grad_norm": 0.29391729831695557,
      "learning_rate": 5.98103214681611e-05,
      "loss": 0.2115,
      "step": 5696
    },
    {
      "epoch": 2.1037666174298377,
      "grad_norm": 0.2725535035133362,
      "learning_rate": 5.978568789259761e-05,
      "loss": 0.1654,
      "step": 5697
    },
    {
      "epoch": 2.104135893648449,
      "grad_norm": 0.24598264694213867,
      "learning_rate": 5.976105431703411e-05,
      "loss": 0.1526,
      "step": 5698
    },
    {
      "epoch": 2.1045051698670605,
      "grad_norm": 0.28126004338264465,
      "learning_rate": 5.973642074147062e-05,
      "loss": 0.1583,
      "step": 5699
    },
    {
      "epoch": 2.104874446085672,
      "grad_norm": 0.3185833692550659,
      "learning_rate": 5.971178716590713e-05,
      "loss": 0.2104,
      "step": 5700
    },
    {
      "epoch": 2.104874446085672,
      "eval_loss": 0.26025980710983276,
      "eval_runtime": 5.8698,
      "eval_samples_per_second": 8.518,
      "eval_steps_per_second": 1.193,
      "step": 5700
    },
    {
      "epoch": 2.1052437223042837,
      "grad_norm": 0.25908419489860535,
      "learning_rate": 5.9687153590343636e-05,
      "loss": 0.1657,
      "step": 5701
    },
    {
      "epoch": 2.1056129985228953,
      "grad_norm": 0.2654663324356079,
      "learning_rate": 5.9662520014780144e-05,
      "loss": 0.1687,
      "step": 5702
    },
    {
      "epoch": 2.1059822747415065,
      "grad_norm": 0.2636914849281311,
      "learning_rate": 5.963788643921665e-05,
      "loss": 0.1869,
      "step": 5703
    },
    {
      "epoch": 2.106351550960118,
      "grad_norm": 0.3741796314716339,
      "learning_rate": 5.961325286365316e-05,
      "loss": 0.2066,
      "step": 5704
    },
    {
      "epoch": 2.1067208271787297,
      "grad_norm": 0.2857508659362793,
      "learning_rate": 5.958861928808967e-05,
      "loss": 0.2045,
      "step": 5705
    },
    {
      "epoch": 2.1070901033973413,
      "grad_norm": 0.30921655893325806,
      "learning_rate": 5.956398571252617e-05,
      "loss": 0.1772,
      "step": 5706
    },
    {
      "epoch": 2.107459379615953,
      "grad_norm": 0.2541576027870178,
      "learning_rate": 5.9539352136962676e-05,
      "loss": 0.1938,
      "step": 5707
    },
    {
      "epoch": 2.1078286558345645,
      "grad_norm": 0.2691689133644104,
      "learning_rate": 5.9514718561399184e-05,
      "loss": 0.1625,
      "step": 5708
    },
    {
      "epoch": 2.1081979320531756,
      "grad_norm": 0.2552073895931244,
      "learning_rate": 5.949008498583569e-05,
      "loss": 0.1543,
      "step": 5709
    },
    {
      "epoch": 2.1085672082717872,
      "grad_norm": 0.2687050700187683,
      "learning_rate": 5.94654514102722e-05,
      "loss": 0.1543,
      "step": 5710
    },
    {
      "epoch": 2.108936484490399,
      "grad_norm": 0.29799342155456543,
      "learning_rate": 5.944081783470871e-05,
      "loss": 0.1869,
      "step": 5711
    },
    {
      "epoch": 2.1093057607090104,
      "grad_norm": 0.2936353087425232,
      "learning_rate": 5.9416184259145216e-05,
      "loss": 0.1764,
      "step": 5712
    },
    {
      "epoch": 2.109675036927622,
      "grad_norm": 0.28229206800460815,
      "learning_rate": 5.939155068358172e-05,
      "loss": 0.1837,
      "step": 5713
    },
    {
      "epoch": 2.110044313146233,
      "grad_norm": 0.3060760498046875,
      "learning_rate": 5.9366917108018224e-05,
      "loss": 0.1811,
      "step": 5714
    },
    {
      "epoch": 2.110413589364845,
      "grad_norm": 0.26369336247444153,
      "learning_rate": 5.934228353245473e-05,
      "loss": 0.1574,
      "step": 5715
    },
    {
      "epoch": 2.1107828655834564,
      "grad_norm": 0.26808932423591614,
      "learning_rate": 5.931764995689124e-05,
      "loss": 0.1754,
      "step": 5716
    },
    {
      "epoch": 2.111152141802068,
      "grad_norm": 0.28503137826919556,
      "learning_rate": 5.929301638132775e-05,
      "loss": 0.176,
      "step": 5717
    },
    {
      "epoch": 2.1115214180206796,
      "grad_norm": 0.22209247946739197,
      "learning_rate": 5.9268382805764256e-05,
      "loss": 0.1708,
      "step": 5718
    },
    {
      "epoch": 2.1118906942392908,
      "grad_norm": 0.25102144479751587,
      "learning_rate": 5.9243749230200764e-05,
      "loss": 0.1665,
      "step": 5719
    },
    {
      "epoch": 2.1122599704579024,
      "grad_norm": 0.24346455931663513,
      "learning_rate": 5.921911565463727e-05,
      "loss": 0.1555,
      "step": 5720
    },
    {
      "epoch": 2.112629246676514,
      "grad_norm": 0.2438468486070633,
      "learning_rate": 5.919448207907378e-05,
      "loss": 0.153,
      "step": 5721
    },
    {
      "epoch": 2.1129985228951256,
      "grad_norm": 0.2507217228412628,
      "learning_rate": 5.916984850351028e-05,
      "loss": 0.1757,
      "step": 5722
    },
    {
      "epoch": 2.113367799113737,
      "grad_norm": 0.23745280504226685,
      "learning_rate": 5.914521492794679e-05,
      "loss": 0.1485,
      "step": 5723
    },
    {
      "epoch": 2.113737075332349,
      "grad_norm": 0.30220702290534973,
      "learning_rate": 5.9120581352383296e-05,
      "loss": 0.1832,
      "step": 5724
    },
    {
      "epoch": 2.11410635155096,
      "grad_norm": 0.28973954916000366,
      "learning_rate": 5.9095947776819804e-05,
      "loss": 0.1827,
      "step": 5725
    },
    {
      "epoch": 2.1144756277695715,
      "grad_norm": 0.23914393782615662,
      "learning_rate": 5.907131420125631e-05,
      "loss": 0.1597,
      "step": 5726
    },
    {
      "epoch": 2.114844903988183,
      "grad_norm": 0.33213070034980774,
      "learning_rate": 5.904668062569282e-05,
      "loss": 0.1586,
      "step": 5727
    },
    {
      "epoch": 2.1152141802067947,
      "grad_norm": 0.25139182806015015,
      "learning_rate": 5.902204705012933e-05,
      "loss": 0.1668,
      "step": 5728
    },
    {
      "epoch": 2.1155834564254064,
      "grad_norm": 0.19673173129558563,
      "learning_rate": 5.8997413474565836e-05,
      "loss": 0.1407,
      "step": 5729
    },
    {
      "epoch": 2.1159527326440175,
      "grad_norm": 0.25255340337753296,
      "learning_rate": 5.897277989900234e-05,
      "loss": 0.1603,
      "step": 5730
    },
    {
      "epoch": 2.116322008862629,
      "grad_norm": 0.22645212709903717,
      "learning_rate": 5.8948146323438845e-05,
      "loss": 0.1504,
      "step": 5731
    },
    {
      "epoch": 2.1166912850812407,
      "grad_norm": 0.2959424555301666,
      "learning_rate": 5.892351274787535e-05,
      "loss": 0.1824,
      "step": 5732
    },
    {
      "epoch": 2.1170605612998523,
      "grad_norm": 0.3307090103626251,
      "learning_rate": 5.889887917231186e-05,
      "loss": 0.1943,
      "step": 5733
    },
    {
      "epoch": 2.117429837518464,
      "grad_norm": 0.31114888191223145,
      "learning_rate": 5.887424559674837e-05,
      "loss": 0.1661,
      "step": 5734
    },
    {
      "epoch": 2.1177991137370755,
      "grad_norm": 0.28659382462501526,
      "learning_rate": 5.8849612021184876e-05,
      "loss": 0.1703,
      "step": 5735
    },
    {
      "epoch": 2.1181683899556867,
      "grad_norm": 0.25932013988494873,
      "learning_rate": 5.8824978445621384e-05,
      "loss": 0.1631,
      "step": 5736
    },
    {
      "epoch": 2.1185376661742983,
      "grad_norm": 0.27428874373435974,
      "learning_rate": 5.880034487005789e-05,
      "loss": 0.1679,
      "step": 5737
    },
    {
      "epoch": 2.11890694239291,
      "grad_norm": 0.2641359269618988,
      "learning_rate": 5.877571129449439e-05,
      "loss": 0.1571,
      "step": 5738
    },
    {
      "epoch": 2.1192762186115215,
      "grad_norm": 0.23932605981826782,
      "learning_rate": 5.87510777189309e-05,
      "loss": 0.1538,
      "step": 5739
    },
    {
      "epoch": 2.119645494830133,
      "grad_norm": 0.3590959906578064,
      "learning_rate": 5.872644414336741e-05,
      "loss": 0.1934,
      "step": 5740
    },
    {
      "epoch": 2.1200147710487443,
      "grad_norm": 0.23727695643901825,
      "learning_rate": 5.8701810567803916e-05,
      "loss": 0.1663,
      "step": 5741
    },
    {
      "epoch": 2.120384047267356,
      "grad_norm": 0.2921147048473358,
      "learning_rate": 5.8677176992240424e-05,
      "loss": 0.1569,
      "step": 5742
    },
    {
      "epoch": 2.1207533234859675,
      "grad_norm": 0.2539224326610565,
      "learning_rate": 5.865254341667693e-05,
      "loss": 0.148,
      "step": 5743
    },
    {
      "epoch": 2.121122599704579,
      "grad_norm": 0.29284903407096863,
      "learning_rate": 5.862790984111344e-05,
      "loss": 0.1742,
      "step": 5744
    },
    {
      "epoch": 2.1214918759231907,
      "grad_norm": 0.2959270477294922,
      "learning_rate": 5.860327626554995e-05,
      "loss": 0.1724,
      "step": 5745
    },
    {
      "epoch": 2.1218611521418023,
      "grad_norm": 0.28707221150398254,
      "learning_rate": 5.857864268998645e-05,
      "loss": 0.1881,
      "step": 5746
    },
    {
      "epoch": 2.1222304283604134,
      "grad_norm": 0.2809945046901703,
      "learning_rate": 5.855400911442296e-05,
      "loss": 0.1885,
      "step": 5747
    },
    {
      "epoch": 2.122599704579025,
      "grad_norm": 0.2609975337982178,
      "learning_rate": 5.8529375538859465e-05,
      "loss": 0.1779,
      "step": 5748
    },
    {
      "epoch": 2.1229689807976366,
      "grad_norm": 0.25174084305763245,
      "learning_rate": 5.850474196329597e-05,
      "loss": 0.1698,
      "step": 5749
    },
    {
      "epoch": 2.1233382570162482,
      "grad_norm": 0.260440468788147,
      "learning_rate": 5.848010838773248e-05,
      "loss": 0.1646,
      "step": 5750
    },
    {
      "epoch": 2.1233382570162482,
      "eval_loss": 0.2585288882255554,
      "eval_runtime": 5.8601,
      "eval_samples_per_second": 8.532,
      "eval_steps_per_second": 1.195,
      "step": 5750
    },
    {
      "epoch": 2.12370753323486,
      "grad_norm": 0.27335822582244873,
      "learning_rate": 5.845547481216899e-05,
      "loss": 0.1782,
      "step": 5751
    },
    {
      "epoch": 2.124076809453471,
      "grad_norm": 0.2632400393486023,
      "learning_rate": 5.8430841236605496e-05,
      "loss": 0.1629,
      "step": 5752
    },
    {
      "epoch": 2.1244460856720826,
      "grad_norm": 0.24932514131069183,
      "learning_rate": 5.8406207661042e-05,
      "loss": 0.1559,
      "step": 5753
    },
    {
      "epoch": 2.124815361890694,
      "grad_norm": 0.28199252486228943,
      "learning_rate": 5.8381574085478505e-05,
      "loss": 0.1625,
      "step": 5754
    },
    {
      "epoch": 2.125184638109306,
      "grad_norm": 0.22602632641792297,
      "learning_rate": 5.835694050991501e-05,
      "loss": 0.1477,
      "step": 5755
    },
    {
      "epoch": 2.1255539143279174,
      "grad_norm": 0.24132585525512695,
      "learning_rate": 5.833230693435152e-05,
      "loss": 0.1722,
      "step": 5756
    },
    {
      "epoch": 2.125923190546529,
      "grad_norm": 0.2727733850479126,
      "learning_rate": 5.830767335878803e-05,
      "loss": 0.217,
      "step": 5757
    },
    {
      "epoch": 2.12629246676514,
      "grad_norm": 0.24890835583209991,
      "learning_rate": 5.8283039783224537e-05,
      "loss": 0.1613,
      "step": 5758
    },
    {
      "epoch": 2.1266617429837518,
      "grad_norm": 0.26370131969451904,
      "learning_rate": 5.8258406207661044e-05,
      "loss": 0.1726,
      "step": 5759
    },
    {
      "epoch": 2.1270310192023634,
      "grad_norm": 0.28788357973098755,
      "learning_rate": 5.823377263209755e-05,
      "loss": 0.2016,
      "step": 5760
    },
    {
      "epoch": 2.127400295420975,
      "grad_norm": 0.27368101477622986,
      "learning_rate": 5.8209139056534053e-05,
      "loss": 0.1735,
      "step": 5761
    },
    {
      "epoch": 2.1277695716395866,
      "grad_norm": 0.24496977031230927,
      "learning_rate": 5.818450548097056e-05,
      "loss": 0.1596,
      "step": 5762
    },
    {
      "epoch": 2.1281388478581977,
      "grad_norm": 0.25704407691955566,
      "learning_rate": 5.815987190540707e-05,
      "loss": 0.1717,
      "step": 5763
    },
    {
      "epoch": 2.1285081240768093,
      "grad_norm": 0.23020388185977936,
      "learning_rate": 5.813523832984358e-05,
      "loss": 0.1651,
      "step": 5764
    },
    {
      "epoch": 2.128877400295421,
      "grad_norm": 0.3059319257736206,
      "learning_rate": 5.8110604754280085e-05,
      "loss": 0.1856,
      "step": 5765
    },
    {
      "epoch": 2.1292466765140325,
      "grad_norm": 0.24818597733974457,
      "learning_rate": 5.808597117871659e-05,
      "loss": 0.183,
      "step": 5766
    },
    {
      "epoch": 2.129615952732644,
      "grad_norm": 0.2819104492664337,
      "learning_rate": 5.80613376031531e-05,
      "loss": 0.1699,
      "step": 5767
    },
    {
      "epoch": 2.1299852289512557,
      "grad_norm": 0.24581509828567505,
      "learning_rate": 5.803670402758961e-05,
      "loss": 0.1971,
      "step": 5768
    },
    {
      "epoch": 2.130354505169867,
      "grad_norm": 0.30326130986213684,
      "learning_rate": 5.801207045202611e-05,
      "loss": 0.2084,
      "step": 5769
    },
    {
      "epoch": 2.1307237813884785,
      "grad_norm": 0.2676447331905365,
      "learning_rate": 5.798743687646262e-05,
      "loss": 0.1895,
      "step": 5770
    },
    {
      "epoch": 2.13109305760709,
      "grad_norm": 0.23784242570400238,
      "learning_rate": 5.7962803300899125e-05,
      "loss": 0.1491,
      "step": 5771
    },
    {
      "epoch": 2.1314623338257017,
      "grad_norm": 0.2932337522506714,
      "learning_rate": 5.793816972533563e-05,
      "loss": 0.1595,
      "step": 5772
    },
    {
      "epoch": 2.1318316100443133,
      "grad_norm": 0.22179383039474487,
      "learning_rate": 5.791353614977214e-05,
      "loss": 0.1439,
      "step": 5773
    },
    {
      "epoch": 2.1322008862629245,
      "grad_norm": 0.3005230128765106,
      "learning_rate": 5.788890257420865e-05,
      "loss": 0.1892,
      "step": 5774
    },
    {
      "epoch": 2.132570162481536,
      "grad_norm": 0.2520924210548401,
      "learning_rate": 5.786426899864516e-05,
      "loss": 0.1728,
      "step": 5775
    },
    {
      "epoch": 2.1329394387001477,
      "grad_norm": 0.31041574478149414,
      "learning_rate": 5.7839635423081665e-05,
      "loss": 0.1919,
      "step": 5776
    },
    {
      "epoch": 2.1333087149187593,
      "grad_norm": 0.20861373841762543,
      "learning_rate": 5.7815001847518166e-05,
      "loss": 0.1528,
      "step": 5777
    },
    {
      "epoch": 2.133677991137371,
      "grad_norm": 0.2767115831375122,
      "learning_rate": 5.7790368271954674e-05,
      "loss": 0.1721,
      "step": 5778
    },
    {
      "epoch": 2.1340472673559825,
      "grad_norm": 0.30877935886383057,
      "learning_rate": 5.776573469639118e-05,
      "loss": 0.1567,
      "step": 5779
    },
    {
      "epoch": 2.1344165435745936,
      "grad_norm": 0.2880702614784241,
      "learning_rate": 5.774110112082769e-05,
      "loss": 0.1788,
      "step": 5780
    },
    {
      "epoch": 2.1347858197932053,
      "grad_norm": 0.30518677830696106,
      "learning_rate": 5.77164675452642e-05,
      "loss": 0.1861,
      "step": 5781
    },
    {
      "epoch": 2.135155096011817,
      "grad_norm": 0.2239362746477127,
      "learning_rate": 5.7691833969700705e-05,
      "loss": 0.1443,
      "step": 5782
    },
    {
      "epoch": 2.1355243722304285,
      "grad_norm": 0.2619790732860565,
      "learning_rate": 5.766720039413721e-05,
      "loss": 0.1874,
      "step": 5783
    },
    {
      "epoch": 2.13589364844904,
      "grad_norm": 0.2446897327899933,
      "learning_rate": 5.764256681857372e-05,
      "loss": 0.16,
      "step": 5784
    },
    {
      "epoch": 2.136262924667651,
      "grad_norm": 0.2709537148475647,
      "learning_rate": 5.761793324301022e-05,
      "loss": 0.1859,
      "step": 5785
    },
    {
      "epoch": 2.136632200886263,
      "grad_norm": 0.20826995372772217,
      "learning_rate": 5.759329966744673e-05,
      "loss": 0.1308,
      "step": 5786
    },
    {
      "epoch": 2.1370014771048744,
      "grad_norm": 0.31698474287986755,
      "learning_rate": 5.756866609188324e-05,
      "loss": 0.192,
      "step": 5787
    },
    {
      "epoch": 2.137370753323486,
      "grad_norm": 0.23542068898677826,
      "learning_rate": 5.7544032516319745e-05,
      "loss": 0.1529,
      "step": 5788
    },
    {
      "epoch": 2.1377400295420976,
      "grad_norm": 0.2614317834377289,
      "learning_rate": 5.751939894075625e-05,
      "loss": 0.183,
      "step": 5789
    },
    {
      "epoch": 2.1381093057607092,
      "grad_norm": 0.2492826133966446,
      "learning_rate": 5.749476536519276e-05,
      "loss": 0.1744,
      "step": 5790
    },
    {
      "epoch": 2.1384785819793204,
      "grad_norm": 0.2894151210784912,
      "learning_rate": 5.747013178962927e-05,
      "loss": 0.1757,
      "step": 5791
    },
    {
      "epoch": 2.138847858197932,
      "grad_norm": 0.2812666594982147,
      "learning_rate": 5.744549821406578e-05,
      "loss": 0.1722,
      "step": 5792
    },
    {
      "epoch": 2.1392171344165436,
      "grad_norm": 0.2668617069721222,
      "learning_rate": 5.742086463850228e-05,
      "loss": 0.1902,
      "step": 5793
    },
    {
      "epoch": 2.139586410635155,
      "grad_norm": 0.2633315324783325,
      "learning_rate": 5.7396231062938786e-05,
      "loss": 0.1762,
      "step": 5794
    },
    {
      "epoch": 2.139955686853767,
      "grad_norm": 0.3152179718017578,
      "learning_rate": 5.7371597487375294e-05,
      "loss": 0.173,
      "step": 5795
    },
    {
      "epoch": 2.140324963072378,
      "grad_norm": 0.26436901092529297,
      "learning_rate": 5.73469639118118e-05,
      "loss": 0.1651,
      "step": 5796
    },
    {
      "epoch": 2.1406942392909896,
      "grad_norm": 0.239777609705925,
      "learning_rate": 5.732233033624831e-05,
      "loss": 0.151,
      "step": 5797
    },
    {
      "epoch": 2.141063515509601,
      "grad_norm": 0.2569291889667511,
      "learning_rate": 5.729769676068482e-05,
      "loss": 0.1573,
      "step": 5798
    },
    {
      "epoch": 2.1414327917282128,
      "grad_norm": 0.2817891538143158,
      "learning_rate": 5.7273063185121325e-05,
      "loss": 0.1643,
      "step": 5799
    },
    {
      "epoch": 2.1418020679468244,
      "grad_norm": 0.2648109793663025,
      "learning_rate": 5.724842960955783e-05,
      "loss": 0.1826,
      "step": 5800
    },
    {
      "epoch": 2.1418020679468244,
      "eval_loss": 0.25734588503837585,
      "eval_runtime": 5.8587,
      "eval_samples_per_second": 8.534,
      "eval_steps_per_second": 1.195,
      "step": 5800
    },
    {
      "epoch": 2.142171344165436,
      "grad_norm": 0.2397744059562683,
      "learning_rate": 5.7223796033994334e-05,
      "loss": 0.1669,
      "step": 5801
    },
    {
      "epoch": 2.142540620384047,
      "grad_norm": 0.2661373019218445,
      "learning_rate": 5.719916245843084e-05,
      "loss": 0.1676,
      "step": 5802
    },
    {
      "epoch": 2.1429098966026587,
      "grad_norm": 0.21583642065525055,
      "learning_rate": 5.717452888286735e-05,
      "loss": 0.156,
      "step": 5803
    },
    {
      "epoch": 2.1432791728212703,
      "grad_norm": 0.23864522576332092,
      "learning_rate": 5.714989530730386e-05,
      "loss": 0.154,
      "step": 5804
    },
    {
      "epoch": 2.143648449039882,
      "grad_norm": 0.23954026401042938,
      "learning_rate": 5.7125261731740365e-05,
      "loss": 0.1631,
      "step": 5805
    },
    {
      "epoch": 2.1440177252584935,
      "grad_norm": 0.2587621510028839,
      "learning_rate": 5.710062815617687e-05,
      "loss": 0.1845,
      "step": 5806
    },
    {
      "epoch": 2.1443870014771047,
      "grad_norm": 0.34114527702331543,
      "learning_rate": 5.707599458061338e-05,
      "loss": 0.1747,
      "step": 5807
    },
    {
      "epoch": 2.1447562776957163,
      "grad_norm": 0.25066620111465454,
      "learning_rate": 5.705136100504989e-05,
      "loss": 0.1615,
      "step": 5808
    },
    {
      "epoch": 2.145125553914328,
      "grad_norm": 0.3193507492542267,
      "learning_rate": 5.702672742948639e-05,
      "loss": 0.1563,
      "step": 5809
    },
    {
      "epoch": 2.1454948301329395,
      "grad_norm": 0.2463066577911377,
      "learning_rate": 5.70020938539229e-05,
      "loss": 0.1512,
      "step": 5810
    },
    {
      "epoch": 2.145864106351551,
      "grad_norm": 0.26367413997650146,
      "learning_rate": 5.6977460278359406e-05,
      "loss": 0.1619,
      "step": 5811
    },
    {
      "epoch": 2.1462333825701623,
      "grad_norm": 0.22284801304340363,
      "learning_rate": 5.6952826702795914e-05,
      "loss": 0.1516,
      "step": 5812
    },
    {
      "epoch": 2.146602658788774,
      "grad_norm": 0.2699580788612366,
      "learning_rate": 5.692819312723242e-05,
      "loss": 0.1846,
      "step": 5813
    },
    {
      "epoch": 2.1469719350073855,
      "grad_norm": 0.332487553358078,
      "learning_rate": 5.690355955166893e-05,
      "loss": 0.1928,
      "step": 5814
    },
    {
      "epoch": 2.147341211225997,
      "grad_norm": 0.27749398350715637,
      "learning_rate": 5.687892597610544e-05,
      "loss": 0.1653,
      "step": 5815
    },
    {
      "epoch": 2.1477104874446087,
      "grad_norm": 0.22983166575431824,
      "learning_rate": 5.6854292400541945e-05,
      "loss": 0.139,
      "step": 5816
    },
    {
      "epoch": 2.1480797636632203,
      "grad_norm": 0.2825463116168976,
      "learning_rate": 5.6829658824978446e-05,
      "loss": 0.1898,
      "step": 5817
    },
    {
      "epoch": 2.1484490398818314,
      "grad_norm": 0.2563636302947998,
      "learning_rate": 5.6805025249414954e-05,
      "loss": 0.1649,
      "step": 5818
    },
    {
      "epoch": 2.148818316100443,
      "grad_norm": 0.24772876501083374,
      "learning_rate": 5.678039167385146e-05,
      "loss": 0.1769,
      "step": 5819
    },
    {
      "epoch": 2.1491875923190547,
      "grad_norm": 0.24235022068023682,
      "learning_rate": 5.675575809828797e-05,
      "loss": 0.1478,
      "step": 5820
    },
    {
      "epoch": 2.1495568685376663,
      "grad_norm": 0.24281422793865204,
      "learning_rate": 5.673112452272448e-05,
      "loss": 0.1721,
      "step": 5821
    },
    {
      "epoch": 2.149926144756278,
      "grad_norm": 0.1965787410736084,
      "learning_rate": 5.6706490947160986e-05,
      "loss": 0.155,
      "step": 5822
    },
    {
      "epoch": 2.150295420974889,
      "grad_norm": 0.2670396566390991,
      "learning_rate": 5.6681857371597493e-05,
      "loss": 0.1879,
      "step": 5823
    },
    {
      "epoch": 2.1506646971935006,
      "grad_norm": 0.2415950447320938,
      "learning_rate": 5.6657223796034e-05,
      "loss": 0.1527,
      "step": 5824
    },
    {
      "epoch": 2.151033973412112,
      "grad_norm": 0.23829121887683868,
      "learning_rate": 5.66325902204705e-05,
      "loss": 0.1563,
      "step": 5825
    },
    {
      "epoch": 2.151403249630724,
      "grad_norm": 0.27524328231811523,
      "learning_rate": 5.660795664490701e-05,
      "loss": 0.1787,
      "step": 5826
    },
    {
      "epoch": 2.1517725258493354,
      "grad_norm": 0.27747201919555664,
      "learning_rate": 5.658332306934352e-05,
      "loss": 0.1871,
      "step": 5827
    },
    {
      "epoch": 2.152141802067947,
      "grad_norm": 0.3109496533870697,
      "learning_rate": 5.6558689493780026e-05,
      "loss": 0.1947,
      "step": 5828
    },
    {
      "epoch": 2.152511078286558,
      "grad_norm": 0.29694050550460815,
      "learning_rate": 5.6534055918216534e-05,
      "loss": 0.1835,
      "step": 5829
    },
    {
      "epoch": 2.15288035450517,
      "grad_norm": 0.3042897582054138,
      "learning_rate": 5.650942234265304e-05,
      "loss": 0.1931,
      "step": 5830
    },
    {
      "epoch": 2.1532496307237814,
      "grad_norm": 0.3608880639076233,
      "learning_rate": 5.648478876708955e-05,
      "loss": 0.1923,
      "step": 5831
    },
    {
      "epoch": 2.153618906942393,
      "grad_norm": 0.2922372817993164,
      "learning_rate": 5.646015519152605e-05,
      "loss": 0.1953,
      "step": 5832
    },
    {
      "epoch": 2.1539881831610046,
      "grad_norm": 0.23728720843791962,
      "learning_rate": 5.643552161596256e-05,
      "loss": 0.1562,
      "step": 5833
    },
    {
      "epoch": 2.1543574593796158,
      "grad_norm": 0.24144093692302704,
      "learning_rate": 5.6410888040399066e-05,
      "loss": 0.1655,
      "step": 5834
    },
    {
      "epoch": 2.1547267355982274,
      "grad_norm": 0.2800363600254059,
      "learning_rate": 5.6386254464835574e-05,
      "loss": 0.1776,
      "step": 5835
    },
    {
      "epoch": 2.155096011816839,
      "grad_norm": 0.3141731917858124,
      "learning_rate": 5.636162088927208e-05,
      "loss": 0.1852,
      "step": 5836
    },
    {
      "epoch": 2.1554652880354506,
      "grad_norm": 0.284499853849411,
      "learning_rate": 5.633698731370859e-05,
      "loss": 0.1824,
      "step": 5837
    },
    {
      "epoch": 2.155834564254062,
      "grad_norm": 0.29822319746017456,
      "learning_rate": 5.63123537381451e-05,
      "loss": 0.167,
      "step": 5838
    },
    {
      "epoch": 2.1562038404726733,
      "grad_norm": 0.30732518434524536,
      "learning_rate": 5.6287720162581606e-05,
      "loss": 0.1819,
      "step": 5839
    },
    {
      "epoch": 2.156573116691285,
      "grad_norm": 0.2756134867668152,
      "learning_rate": 5.626308658701811e-05,
      "loss": 0.1527,
      "step": 5840
    },
    {
      "epoch": 2.1569423929098965,
      "grad_norm": 0.27252382040023804,
      "learning_rate": 5.6238453011454615e-05,
      "loss": 0.1897,
      "step": 5841
    },
    {
      "epoch": 2.157311669128508,
      "grad_norm": 0.27324724197387695,
      "learning_rate": 5.621381943589112e-05,
      "loss": 0.1805,
      "step": 5842
    },
    {
      "epoch": 2.1576809453471197,
      "grad_norm": 0.2699751555919647,
      "learning_rate": 5.618918586032763e-05,
      "loss": 0.1842,
      "step": 5843
    },
    {
      "epoch": 2.1580502215657313,
      "grad_norm": 0.22370193898677826,
      "learning_rate": 5.616455228476414e-05,
      "loss": 0.1603,
      "step": 5844
    },
    {
      "epoch": 2.1584194977843425,
      "grad_norm": 0.272770494222641,
      "learning_rate": 5.6139918709200646e-05,
      "loss": 0.1885,
      "step": 5845
    },
    {
      "epoch": 2.158788774002954,
      "grad_norm": 0.30952098965644836,
      "learning_rate": 5.6115285133637154e-05,
      "loss": 0.1865,
      "step": 5846
    },
    {
      "epoch": 2.1591580502215657,
      "grad_norm": 0.23765036463737488,
      "learning_rate": 5.609065155807366e-05,
      "loss": 0.1527,
      "step": 5847
    },
    {
      "epoch": 2.1595273264401773,
      "grad_norm": 0.3192552328109741,
      "learning_rate": 5.606601798251016e-05,
      "loss": 0.1782,
      "step": 5848
    },
    {
      "epoch": 2.159896602658789,
      "grad_norm": 0.25643131136894226,
      "learning_rate": 5.604138440694667e-05,
      "loss": 0.1742,
      "step": 5849
    },
    {
      "epoch": 2.1602658788774,
      "grad_norm": 0.25116267800331116,
      "learning_rate": 5.601675083138318e-05,
      "loss": 0.1644,
      "step": 5850
    },
    {
      "epoch": 2.1602658788774,
      "eval_loss": 0.25662660598754883,
      "eval_runtime": 5.8517,
      "eval_samples_per_second": 8.545,
      "eval_steps_per_second": 1.196,
      "step": 5850
    },
    {
      "epoch": 2.1606351550960117,
      "grad_norm": 0.23183858394622803,
      "learning_rate": 5.5992117255819687e-05,
      "loss": 0.1669,
      "step": 5851
    },
    {
      "epoch": 2.1610044313146233,
      "grad_norm": 0.23837564885616302,
      "learning_rate": 5.5967483680256194e-05,
      "loss": 0.1608,
      "step": 5852
    },
    {
      "epoch": 2.161373707533235,
      "grad_norm": 0.2505916655063629,
      "learning_rate": 5.59428501046927e-05,
      "loss": 0.1722,
      "step": 5853
    },
    {
      "epoch": 2.1617429837518465,
      "grad_norm": 0.21763832867145538,
      "learning_rate": 5.591821652912921e-05,
      "loss": 0.1438,
      "step": 5854
    },
    {
      "epoch": 2.162112259970458,
      "grad_norm": 0.2366868257522583,
      "learning_rate": 5.589358295356572e-05,
      "loss": 0.169,
      "step": 5855
    },
    {
      "epoch": 2.1624815361890692,
      "grad_norm": 0.23833197355270386,
      "learning_rate": 5.586894937800222e-05,
      "loss": 0.1854,
      "step": 5856
    },
    {
      "epoch": 2.162850812407681,
      "grad_norm": 0.31813618540763855,
      "learning_rate": 5.584431580243873e-05,
      "loss": 0.1752,
      "step": 5857
    },
    {
      "epoch": 2.1632200886262924,
      "grad_norm": 0.27261093258857727,
      "learning_rate": 5.5819682226875235e-05,
      "loss": 0.1701,
      "step": 5858
    },
    {
      "epoch": 2.163589364844904,
      "grad_norm": 0.31021595001220703,
      "learning_rate": 5.579504865131174e-05,
      "loss": 0.1774,
      "step": 5859
    },
    {
      "epoch": 2.1639586410635157,
      "grad_norm": 0.2569142282009125,
      "learning_rate": 5.577041507574825e-05,
      "loss": 0.1765,
      "step": 5860
    },
    {
      "epoch": 2.164327917282127,
      "grad_norm": 0.24459010362625122,
      "learning_rate": 5.574578150018476e-05,
      "loss": 0.1658,
      "step": 5861
    },
    {
      "epoch": 2.1646971935007384,
      "grad_norm": 0.27214524149894714,
      "learning_rate": 5.5721147924621266e-05,
      "loss": 0.1574,
      "step": 5862
    },
    {
      "epoch": 2.16506646971935,
      "grad_norm": 0.3365628719329834,
      "learning_rate": 5.5696514349057774e-05,
      "loss": 0.1833,
      "step": 5863
    },
    {
      "epoch": 2.1654357459379616,
      "grad_norm": 0.24673442542552948,
      "learning_rate": 5.5671880773494275e-05,
      "loss": 0.1724,
      "step": 5864
    },
    {
      "epoch": 2.1658050221565732,
      "grad_norm": 0.28301650285720825,
      "learning_rate": 5.564724719793078e-05,
      "loss": 0.1741,
      "step": 5865
    },
    {
      "epoch": 2.166174298375185,
      "grad_norm": 0.25149691104888916,
      "learning_rate": 5.562261362236729e-05,
      "loss": 0.1668,
      "step": 5866
    },
    {
      "epoch": 2.166543574593796,
      "grad_norm": 0.2798976004123688,
      "learning_rate": 5.55979800468038e-05,
      "loss": 0.1699,
      "step": 5867
    },
    {
      "epoch": 2.1669128508124076,
      "grad_norm": 0.31019899249076843,
      "learning_rate": 5.557334647124031e-05,
      "loss": 0.183,
      "step": 5868
    },
    {
      "epoch": 2.167282127031019,
      "grad_norm": 0.3291718363761902,
      "learning_rate": 5.5548712895676815e-05,
      "loss": 0.1629,
      "step": 5869
    },
    {
      "epoch": 2.167651403249631,
      "grad_norm": 0.23700349032878876,
      "learning_rate": 5.552407932011332e-05,
      "loss": 0.1634,
      "step": 5870
    },
    {
      "epoch": 2.1680206794682424,
      "grad_norm": 0.29189571738243103,
      "learning_rate": 5.549944574454983e-05,
      "loss": 0.1852,
      "step": 5871
    },
    {
      "epoch": 2.1683899556868536,
      "grad_norm": 0.23060524463653564,
      "learning_rate": 5.547481216898633e-05,
      "loss": 0.1377,
      "step": 5872
    },
    {
      "epoch": 2.168759231905465,
      "grad_norm": 0.2611374258995056,
      "learning_rate": 5.545017859342284e-05,
      "loss": 0.1765,
      "step": 5873
    },
    {
      "epoch": 2.1691285081240768,
      "grad_norm": 0.2506032884120941,
      "learning_rate": 5.542554501785935e-05,
      "loss": 0.1471,
      "step": 5874
    },
    {
      "epoch": 2.1694977843426884,
      "grad_norm": 0.280585378408432,
      "learning_rate": 5.5400911442295855e-05,
      "loss": 0.158,
      "step": 5875
    },
    {
      "epoch": 2.1698670605613,
      "grad_norm": 0.27997133135795593,
      "learning_rate": 5.537627786673236e-05,
      "loss": 0.1544,
      "step": 5876
    },
    {
      "epoch": 2.1702363367799116,
      "grad_norm": 0.2693052589893341,
      "learning_rate": 5.535164429116887e-05,
      "loss": 0.1932,
      "step": 5877
    },
    {
      "epoch": 2.1706056129985227,
      "grad_norm": 0.2868502736091614,
      "learning_rate": 5.532701071560538e-05,
      "loss": 0.1742,
      "step": 5878
    },
    {
      "epoch": 2.1709748892171343,
      "grad_norm": 0.2899225652217865,
      "learning_rate": 5.5302377140041886e-05,
      "loss": 0.2052,
      "step": 5879
    },
    {
      "epoch": 2.171344165435746,
      "grad_norm": 0.2309410274028778,
      "learning_rate": 5.527774356447839e-05,
      "loss": 0.1585,
      "step": 5880
    },
    {
      "epoch": 2.1717134416543575,
      "grad_norm": 0.2919321060180664,
      "learning_rate": 5.5253109988914895e-05,
      "loss": 0.1513,
      "step": 5881
    },
    {
      "epoch": 2.172082717872969,
      "grad_norm": 0.27619990706443787,
      "learning_rate": 5.52284764133514e-05,
      "loss": 0.1836,
      "step": 5882
    },
    {
      "epoch": 2.1724519940915803,
      "grad_norm": 0.21038885414600372,
      "learning_rate": 5.520384283778791e-05,
      "loss": 0.1528,
      "step": 5883
    },
    {
      "epoch": 2.172821270310192,
      "grad_norm": 0.26871126890182495,
      "learning_rate": 5.517920926222442e-05,
      "loss": 0.1784,
      "step": 5884
    },
    {
      "epoch": 2.1731905465288035,
      "grad_norm": 0.2990822196006775,
      "learning_rate": 5.515457568666093e-05,
      "loss": 0.1697,
      "step": 5885
    },
    {
      "epoch": 2.173559822747415,
      "grad_norm": 0.27839839458465576,
      "learning_rate": 5.5129942111097435e-05,
      "loss": 0.18,
      "step": 5886
    },
    {
      "epoch": 2.1739290989660267,
      "grad_norm": 0.30338016152381897,
      "learning_rate": 5.510530853553394e-05,
      "loss": 0.2025,
      "step": 5887
    },
    {
      "epoch": 2.1742983751846383,
      "grad_norm": 0.25437456369400024,
      "learning_rate": 5.5080674959970444e-05,
      "loss": 0.1733,
      "step": 5888
    },
    {
      "epoch": 2.1746676514032495,
      "grad_norm": 0.26489147543907166,
      "learning_rate": 5.505604138440695e-05,
      "loss": 0.1791,
      "step": 5889
    },
    {
      "epoch": 2.175036927621861,
      "grad_norm": 0.24482989311218262,
      "learning_rate": 5.503140780884346e-05,
      "loss": 0.1542,
      "step": 5890
    },
    {
      "epoch": 2.1754062038404727,
      "grad_norm": 0.24775661528110504,
      "learning_rate": 5.500677423327997e-05,
      "loss": 0.1643,
      "step": 5891
    },
    {
      "epoch": 2.1757754800590843,
      "grad_norm": 0.2753461003303528,
      "learning_rate": 5.4982140657716475e-05,
      "loss": 0.1722,
      "step": 5892
    },
    {
      "epoch": 2.176144756277696,
      "grad_norm": 0.2582942545413971,
      "learning_rate": 5.495750708215298e-05,
      "loss": 0.1795,
      "step": 5893
    },
    {
      "epoch": 2.176514032496307,
      "grad_norm": 0.279904305934906,
      "learning_rate": 5.493287350658949e-05,
      "loss": 0.1601,
      "step": 5894
    },
    {
      "epoch": 2.1768833087149186,
      "grad_norm": 0.25517791509628296,
      "learning_rate": 5.4908239931026e-05,
      "loss": 0.1567,
      "step": 5895
    },
    {
      "epoch": 2.1772525849335302,
      "grad_norm": 0.2075134962797165,
      "learning_rate": 5.48836063554625e-05,
      "loss": 0.1467,
      "step": 5896
    },
    {
      "epoch": 2.177621861152142,
      "grad_norm": 0.3045804798603058,
      "learning_rate": 5.485897277989901e-05,
      "loss": 0.1694,
      "step": 5897
    },
    {
      "epoch": 2.1779911373707534,
      "grad_norm": 0.3269966244697571,
      "learning_rate": 5.4834339204335515e-05,
      "loss": 0.1689,
      "step": 5898
    },
    {
      "epoch": 2.178360413589365,
      "grad_norm": 0.2516946792602539,
      "learning_rate": 5.480970562877202e-05,
      "loss": 0.1531,
      "step": 5899
    },
    {
      "epoch": 2.178729689807976,
      "grad_norm": 0.2638711929321289,
      "learning_rate": 5.478507205320853e-05,
      "loss": 0.1558,
      "step": 5900
    },
    {
      "epoch": 2.178729689807976,
      "eval_loss": 0.2542784810066223,
      "eval_runtime": 5.8598,
      "eval_samples_per_second": 8.533,
      "eval_steps_per_second": 1.195,
      "step": 5900
    },
    {
      "epoch": 2.179098966026588,
      "grad_norm": 0.22851087152957916,
      "learning_rate": 5.476043847764504e-05,
      "loss": 0.1753,
      "step": 5901
    },
    {
      "epoch": 2.1794682422451994,
      "grad_norm": 0.31952738761901855,
      "learning_rate": 5.473580490208155e-05,
      "loss": 0.1792,
      "step": 5902
    },
    {
      "epoch": 2.179837518463811,
      "grad_norm": 0.26568403840065,
      "learning_rate": 5.4711171326518055e-05,
      "loss": 0.1605,
      "step": 5903
    },
    {
      "epoch": 2.1802067946824226,
      "grad_norm": 0.3061986565589905,
      "learning_rate": 5.4686537750954556e-05,
      "loss": 0.1841,
      "step": 5904
    },
    {
      "epoch": 2.180576070901034,
      "grad_norm": 0.2837606966495514,
      "learning_rate": 5.4661904175391064e-05,
      "loss": 0.1786,
      "step": 5905
    },
    {
      "epoch": 2.1809453471196454,
      "grad_norm": 0.27201905846595764,
      "learning_rate": 5.463727059982757e-05,
      "loss": 0.1671,
      "step": 5906
    },
    {
      "epoch": 2.181314623338257,
      "grad_norm": 0.2528875470161438,
      "learning_rate": 5.461263702426408e-05,
      "loss": 0.1815,
      "step": 5907
    },
    {
      "epoch": 2.1816838995568686,
      "grad_norm": 0.2863306999206543,
      "learning_rate": 5.458800344870059e-05,
      "loss": 0.1689,
      "step": 5908
    },
    {
      "epoch": 2.18205317577548,
      "grad_norm": 0.32362571358680725,
      "learning_rate": 5.4563369873137095e-05,
      "loss": 0.2014,
      "step": 5909
    },
    {
      "epoch": 2.182422451994092,
      "grad_norm": 0.2716105878353119,
      "learning_rate": 5.45387362975736e-05,
      "loss": 0.1761,
      "step": 5910
    },
    {
      "epoch": 2.182791728212703,
      "grad_norm": 0.23163507878780365,
      "learning_rate": 5.4514102722010104e-05,
      "loss": 0.1653,
      "step": 5911
    },
    {
      "epoch": 2.1831610044313146,
      "grad_norm": 0.26279935240745544,
      "learning_rate": 5.448946914644661e-05,
      "loss": 0.1685,
      "step": 5912
    },
    {
      "epoch": 2.183530280649926,
      "grad_norm": 0.2372507005929947,
      "learning_rate": 5.446483557088312e-05,
      "loss": 0.1571,
      "step": 5913
    },
    {
      "epoch": 2.1838995568685378,
      "grad_norm": 0.3401440382003784,
      "learning_rate": 5.444020199531963e-05,
      "loss": 0.188,
      "step": 5914
    },
    {
      "epoch": 2.1842688330871494,
      "grad_norm": 0.24209219217300415,
      "learning_rate": 5.4415568419756136e-05,
      "loss": 0.1577,
      "step": 5915
    },
    {
      "epoch": 2.1846381093057605,
      "grad_norm": 0.28312069177627563,
      "learning_rate": 5.4390934844192643e-05,
      "loss": 0.1601,
      "step": 5916
    },
    {
      "epoch": 2.185007385524372,
      "grad_norm": 0.24116961658000946,
      "learning_rate": 5.436630126862915e-05,
      "loss": 0.1768,
      "step": 5917
    },
    {
      "epoch": 2.1853766617429837,
      "grad_norm": 0.2630855441093445,
      "learning_rate": 5.434166769306566e-05,
      "loss": 0.1607,
      "step": 5918
    },
    {
      "epoch": 2.1857459379615953,
      "grad_norm": 0.31574612855911255,
      "learning_rate": 5.431703411750216e-05,
      "loss": 0.1635,
      "step": 5919
    },
    {
      "epoch": 2.186115214180207,
      "grad_norm": 0.31741586327552795,
      "learning_rate": 5.429240054193867e-05,
      "loss": 0.1883,
      "step": 5920
    },
    {
      "epoch": 2.1864844903988185,
      "grad_norm": 0.26064226031303406,
      "learning_rate": 5.426776696637517e-05,
      "loss": 0.1612,
      "step": 5921
    },
    {
      "epoch": 2.1868537666174297,
      "grad_norm": 0.23602989315986633,
      "learning_rate": 5.424313339081167e-05,
      "loss": 0.1526,
      "step": 5922
    },
    {
      "epoch": 2.1872230428360413,
      "grad_norm": 0.22026140987873077,
      "learning_rate": 5.421849981524818e-05,
      "loss": 0.1462,
      "step": 5923
    },
    {
      "epoch": 2.187592319054653,
      "grad_norm": 0.3276820778846741,
      "learning_rate": 5.4193866239684686e-05,
      "loss": 0.1931,
      "step": 5924
    },
    {
      "epoch": 2.1879615952732645,
      "grad_norm": 0.2706556022167206,
      "learning_rate": 5.4169232664121194e-05,
      "loss": 0.1658,
      "step": 5925
    },
    {
      "epoch": 2.188330871491876,
      "grad_norm": 0.27734729647636414,
      "learning_rate": 5.41445990885577e-05,
      "loss": 0.1862,
      "step": 5926
    },
    {
      "epoch": 2.1887001477104873,
      "grad_norm": 0.2646738886833191,
      "learning_rate": 5.411996551299421e-05,
      "loss": 0.1732,
      "step": 5927
    },
    {
      "epoch": 2.189069423929099,
      "grad_norm": 0.25028637051582336,
      "learning_rate": 5.409533193743072e-05,
      "loss": 0.1798,
      "step": 5928
    },
    {
      "epoch": 2.1894387001477105,
      "grad_norm": 0.2716618478298187,
      "learning_rate": 5.4070698361867225e-05,
      "loss": 0.2028,
      "step": 5929
    },
    {
      "epoch": 2.189807976366322,
      "grad_norm": 0.26107484102249146,
      "learning_rate": 5.4046064786303726e-05,
      "loss": 0.1683,
      "step": 5930
    },
    {
      "epoch": 2.1901772525849337,
      "grad_norm": 0.2878517806529999,
      "learning_rate": 5.4021431210740234e-05,
      "loss": 0.1751,
      "step": 5931
    },
    {
      "epoch": 2.1905465288035453,
      "grad_norm": 0.2957293391227722,
      "learning_rate": 5.399679763517674e-05,
      "loss": 0.1894,
      "step": 5932
    },
    {
      "epoch": 2.1909158050221564,
      "grad_norm": 0.2170945703983307,
      "learning_rate": 5.397216405961325e-05,
      "loss": 0.1485,
      "step": 5933
    },
    {
      "epoch": 2.191285081240768,
      "grad_norm": 0.2723937928676605,
      "learning_rate": 5.394753048404976e-05,
      "loss": 0.1754,
      "step": 5934
    },
    {
      "epoch": 2.1916543574593796,
      "grad_norm": 0.30582040548324585,
      "learning_rate": 5.3922896908486266e-05,
      "loss": 0.1846,
      "step": 5935
    },
    {
      "epoch": 2.1920236336779912,
      "grad_norm": 0.2360096573829651,
      "learning_rate": 5.3898263332922774e-05,
      "loss": 0.165,
      "step": 5936
    },
    {
      "epoch": 2.192392909896603,
      "grad_norm": 0.2892259955406189,
      "learning_rate": 5.387362975735928e-05,
      "loss": 0.2004,
      "step": 5937
    },
    {
      "epoch": 2.192762186115214,
      "grad_norm": 0.26588162779808044,
      "learning_rate": 5.384899618179578e-05,
      "loss": 0.1754,
      "step": 5938
    },
    {
      "epoch": 2.1931314623338256,
      "grad_norm": 0.2676050364971161,
      "learning_rate": 5.382436260623229e-05,
      "loss": 0.1792,
      "step": 5939
    },
    {
      "epoch": 2.193500738552437,
      "grad_norm": 0.30062136054039,
      "learning_rate": 5.37997290306688e-05,
      "loss": 0.1775,
      "step": 5940
    },
    {
      "epoch": 2.193870014771049,
      "grad_norm": 0.24869313836097717,
      "learning_rate": 5.3775095455105306e-05,
      "loss": 0.1585,
      "step": 5941
    },
    {
      "epoch": 2.1942392909896604,
      "grad_norm": 0.23664288222789764,
      "learning_rate": 5.3750461879541814e-05,
      "loss": 0.1431,
      "step": 5942
    },
    {
      "epoch": 2.194608567208272,
      "grad_norm": 0.28411614894866943,
      "learning_rate": 5.372582830397832e-05,
      "loss": 0.1885,
      "step": 5943
    },
    {
      "epoch": 2.194977843426883,
      "grad_norm": 0.23681482672691345,
      "learning_rate": 5.370119472841483e-05,
      "loss": 0.1702,
      "step": 5944
    },
    {
      "epoch": 2.195347119645495,
      "grad_norm": 0.26534298062324524,
      "learning_rate": 5.367656115285134e-05,
      "loss": 0.1538,
      "step": 5945
    },
    {
      "epoch": 2.1957163958641064,
      "grad_norm": 0.25928986072540283,
      "learning_rate": 5.365192757728784e-05,
      "loss": 0.1779,
      "step": 5946
    },
    {
      "epoch": 2.196085672082718,
      "grad_norm": 0.26320552825927734,
      "learning_rate": 5.3627294001724347e-05,
      "loss": 0.1569,
      "step": 5947
    },
    {
      "epoch": 2.1964549483013296,
      "grad_norm": 0.22869595885276794,
      "learning_rate": 5.3602660426160854e-05,
      "loss": 0.166,
      "step": 5948
    },
    {
      "epoch": 2.1968242245199407,
      "grad_norm": 0.3253004848957062,
      "learning_rate": 5.357802685059736e-05,
      "loss": 0.1927,
      "step": 5949
    },
    {
      "epoch": 2.1971935007385524,
      "grad_norm": 0.3554944396018982,
      "learning_rate": 5.355339327503387e-05,
      "loss": 0.1755,
      "step": 5950
    },
    {
      "epoch": 2.1971935007385524,
      "eval_loss": 0.25651848316192627,
      "eval_runtime": 5.8591,
      "eval_samples_per_second": 8.534,
      "eval_steps_per_second": 1.195,
      "step": 5950
    },
    {
      "epoch": 2.197562776957164,
      "grad_norm": 0.23495326936244965,
      "learning_rate": 5.352875969947038e-05,
      "loss": 0.158,
      "step": 5951
    },
    {
      "epoch": 2.1979320531757756,
      "grad_norm": 0.2928256392478943,
      "learning_rate": 5.3504126123906886e-05,
      "loss": 0.1874,
      "step": 5952
    },
    {
      "epoch": 2.198301329394387,
      "grad_norm": 0.264809250831604,
      "learning_rate": 5.3479492548343394e-05,
      "loss": 0.1728,
      "step": 5953
    },
    {
      "epoch": 2.1986706056129983,
      "grad_norm": 0.2503385543823242,
      "learning_rate": 5.3454858972779895e-05,
      "loss": 0.172,
      "step": 5954
    },
    {
      "epoch": 2.19903988183161,
      "grad_norm": 0.2616809010505676,
      "learning_rate": 5.34302253972164e-05,
      "loss": 0.1511,
      "step": 5955
    },
    {
      "epoch": 2.1994091580502215,
      "grad_norm": 0.23035597801208496,
      "learning_rate": 5.340559182165291e-05,
      "loss": 0.1684,
      "step": 5956
    },
    {
      "epoch": 2.199778434268833,
      "grad_norm": 0.2465926557779312,
      "learning_rate": 5.338095824608942e-05,
      "loss": 0.1698,
      "step": 5957
    },
    {
      "epoch": 2.2001477104874447,
      "grad_norm": 0.27567368745803833,
      "learning_rate": 5.3356324670525926e-05,
      "loss": 0.1859,
      "step": 5958
    },
    {
      "epoch": 2.2005169867060563,
      "grad_norm": 0.22160843014717102,
      "learning_rate": 5.3331691094962434e-05,
      "loss": 0.1516,
      "step": 5959
    },
    {
      "epoch": 2.2008862629246675,
      "grad_norm": 0.26636406779289246,
      "learning_rate": 5.330705751939894e-05,
      "loss": 0.1644,
      "step": 5960
    },
    {
      "epoch": 2.201255539143279,
      "grad_norm": 0.29620641469955444,
      "learning_rate": 5.328242394383545e-05,
      "loss": 0.1603,
      "step": 5961
    },
    {
      "epoch": 2.2016248153618907,
      "grad_norm": 0.2895677089691162,
      "learning_rate": 5.325779036827195e-05,
      "loss": 0.1809,
      "step": 5962
    },
    {
      "epoch": 2.2019940915805023,
      "grad_norm": 0.28737372159957886,
      "learning_rate": 5.323315679270846e-05,
      "loss": 0.1832,
      "step": 5963
    },
    {
      "epoch": 2.202363367799114,
      "grad_norm": 0.28137853741645813,
      "learning_rate": 5.320852321714497e-05,
      "loss": 0.1738,
      "step": 5964
    },
    {
      "epoch": 2.202732644017725,
      "grad_norm": 0.38717013597488403,
      "learning_rate": 5.3183889641581475e-05,
      "loss": 0.1862,
      "step": 5965
    },
    {
      "epoch": 2.2031019202363367,
      "grad_norm": 0.27989840507507324,
      "learning_rate": 5.315925606601798e-05,
      "loss": 0.1471,
      "step": 5966
    },
    {
      "epoch": 2.2034711964549483,
      "grad_norm": 0.23981954157352448,
      "learning_rate": 5.313462249045449e-05,
      "loss": 0.1699,
      "step": 5967
    },
    {
      "epoch": 2.20384047267356,
      "grad_norm": 0.28785717487335205,
      "learning_rate": 5.3109988914891e-05,
      "loss": 0.1627,
      "step": 5968
    },
    {
      "epoch": 2.2042097488921715,
      "grad_norm": 0.3268805742263794,
      "learning_rate": 5.3085355339327506e-05,
      "loss": 0.193,
      "step": 5969
    },
    {
      "epoch": 2.2045790251107826,
      "grad_norm": 0.25777700543403625,
      "learning_rate": 5.306072176376401e-05,
      "loss": 0.1678,
      "step": 5970
    },
    {
      "epoch": 2.2049483013293942,
      "grad_norm": 0.26376500725746155,
      "learning_rate": 5.3036088188200515e-05,
      "loss": 0.1632,
      "step": 5971
    },
    {
      "epoch": 2.205317577548006,
      "grad_norm": 0.25371983647346497,
      "learning_rate": 5.301145461263702e-05,
      "loss": 0.1668,
      "step": 5972
    },
    {
      "epoch": 2.2056868537666174,
      "grad_norm": 0.22691114246845245,
      "learning_rate": 5.298682103707353e-05,
      "loss": 0.145,
      "step": 5973
    },
    {
      "epoch": 2.206056129985229,
      "grad_norm": 0.24083566665649414,
      "learning_rate": 5.296218746151004e-05,
      "loss": 0.1603,
      "step": 5974
    },
    {
      "epoch": 2.2064254062038406,
      "grad_norm": 0.228380486369133,
      "learning_rate": 5.2937553885946546e-05,
      "loss": 0.1398,
      "step": 5975
    },
    {
      "epoch": 2.206794682422452,
      "grad_norm": 0.30277687311172485,
      "learning_rate": 5.2912920310383054e-05,
      "loss": 0.1856,
      "step": 5976
    },
    {
      "epoch": 2.2071639586410634,
      "grad_norm": 0.303791880607605,
      "learning_rate": 5.288828673481956e-05,
      "loss": 0.163,
      "step": 5977
    },
    {
      "epoch": 2.207533234859675,
      "grad_norm": 0.266146183013916,
      "learning_rate": 5.286365315925606e-05,
      "loss": 0.1871,
      "step": 5978
    },
    {
      "epoch": 2.2079025110782866,
      "grad_norm": 0.290078341960907,
      "learning_rate": 5.283901958369257e-05,
      "loss": 0.1705,
      "step": 5979
    },
    {
      "epoch": 2.208271787296898,
      "grad_norm": 0.2652760446071625,
      "learning_rate": 5.281438600812908e-05,
      "loss": 0.156,
      "step": 5980
    },
    {
      "epoch": 2.2086410635155094,
      "grad_norm": 0.286885142326355,
      "learning_rate": 5.278975243256559e-05,
      "loss": 0.1833,
      "step": 5981
    },
    {
      "epoch": 2.209010339734121,
      "grad_norm": 0.26764151453971863,
      "learning_rate": 5.2765118857002095e-05,
      "loss": 0.1834,
      "step": 5982
    },
    {
      "epoch": 2.2093796159527326,
      "grad_norm": 0.24872265756130219,
      "learning_rate": 5.27404852814386e-05,
      "loss": 0.1637,
      "step": 5983
    },
    {
      "epoch": 2.209748892171344,
      "grad_norm": 0.2747139632701874,
      "learning_rate": 5.271585170587511e-05,
      "loss": 0.1703,
      "step": 5984
    },
    {
      "epoch": 2.210118168389956,
      "grad_norm": 0.2682111859321594,
      "learning_rate": 5.269121813031161e-05,
      "loss": 0.1697,
      "step": 5985
    },
    {
      "epoch": 2.2104874446085674,
      "grad_norm": 0.3948352038860321,
      "learning_rate": 5.266658455474812e-05,
      "loss": 0.1763,
      "step": 5986
    },
    {
      "epoch": 2.2108567208271785,
      "grad_norm": 0.27390846610069275,
      "learning_rate": 5.264195097918463e-05,
      "loss": 0.1678,
      "step": 5987
    },
    {
      "epoch": 2.21122599704579,
      "grad_norm": 0.24934548139572144,
      "learning_rate": 5.2617317403621135e-05,
      "loss": 0.1761,
      "step": 5988
    },
    {
      "epoch": 2.2115952732644018,
      "grad_norm": 0.2849212884902954,
      "learning_rate": 5.259268382805764e-05,
      "loss": 0.1843,
      "step": 5989
    },
    {
      "epoch": 2.2119645494830134,
      "grad_norm": 0.25760313868522644,
      "learning_rate": 5.256805025249415e-05,
      "loss": 0.1454,
      "step": 5990
    },
    {
      "epoch": 2.212333825701625,
      "grad_norm": 0.253659725189209,
      "learning_rate": 5.254341667693066e-05,
      "loss": 0.1763,
      "step": 5991
    },
    {
      "epoch": 2.212703101920236,
      "grad_norm": 0.2626623511314392,
      "learning_rate": 5.2518783101367167e-05,
      "loss": 0.162,
      "step": 5992
    },
    {
      "epoch": 2.2130723781388477,
      "grad_norm": 0.2705555260181427,
      "learning_rate": 5.249414952580367e-05,
      "loss": 0.1734,
      "step": 5993
    },
    {
      "epoch": 2.2134416543574593,
      "grad_norm": 0.2433822602033615,
      "learning_rate": 5.2469515950240175e-05,
      "loss": 0.1614,
      "step": 5994
    },
    {
      "epoch": 2.213810930576071,
      "grad_norm": 0.27496635913848877,
      "learning_rate": 5.244488237467668e-05,
      "loss": 0.1639,
      "step": 5995
    },
    {
      "epoch": 2.2141802067946825,
      "grad_norm": 0.29041311144828796,
      "learning_rate": 5.242024879911319e-05,
      "loss": 0.1496,
      "step": 5996
    },
    {
      "epoch": 2.214549483013294,
      "grad_norm": 0.2603946030139923,
      "learning_rate": 5.23956152235497e-05,
      "loss": 0.1553,
      "step": 5997
    },
    {
      "epoch": 2.2149187592319053,
      "grad_norm": 0.30235958099365234,
      "learning_rate": 5.237098164798621e-05,
      "loss": 0.1983,
      "step": 5998
    },
    {
      "epoch": 2.215288035450517,
      "grad_norm": 0.23368962109088898,
      "learning_rate": 5.2346348072422715e-05,
      "loss": 0.1538,
      "step": 5999
    },
    {
      "epoch": 2.2156573116691285,
      "grad_norm": 0.3310740888118744,
      "learning_rate": 5.232171449685922e-05,
      "loss": 0.1808,
      "step": 6000
    },
    {
      "epoch": 2.2156573116691285,
      "eval_loss": 0.253889262676239,
      "eval_runtime": 5.8684,
      "eval_samples_per_second": 8.52,
      "eval_steps_per_second": 1.193,
      "step": 6000
    },
    {
      "epoch": 2.21602658788774,
      "grad_norm": 0.24267062544822693,
      "learning_rate": 5.2297080921295724e-05,
      "loss": 0.1824,
      "step": 6001
    },
    {
      "epoch": 2.2163958641063517,
      "grad_norm": 0.29777005314826965,
      "learning_rate": 5.227244734573223e-05,
      "loss": 0.1793,
      "step": 6002
    },
    {
      "epoch": 2.216765140324963,
      "grad_norm": 0.2265225499868393,
      "learning_rate": 5.224781377016874e-05,
      "loss": 0.1516,
      "step": 6003
    },
    {
      "epoch": 2.2171344165435745,
      "grad_norm": 0.2195178121328354,
      "learning_rate": 5.222318019460525e-05,
      "loss": 0.1521,
      "step": 6004
    },
    {
      "epoch": 2.217503692762186,
      "grad_norm": 0.2303364872932434,
      "learning_rate": 5.2198546619041755e-05,
      "loss": 0.1489,
      "step": 6005
    },
    {
      "epoch": 2.2178729689807977,
      "grad_norm": 0.29687920212745667,
      "learning_rate": 5.217391304347826e-05,
      "loss": 0.1876,
      "step": 6006
    },
    {
      "epoch": 2.2182422451994093,
      "grad_norm": 0.22180317342281342,
      "learning_rate": 5.214927946791477e-05,
      "loss": 0.1512,
      "step": 6007
    },
    {
      "epoch": 2.218611521418021,
      "grad_norm": 0.2409660518169403,
      "learning_rate": 5.212464589235128e-05,
      "loss": 0.1672,
      "step": 6008
    },
    {
      "epoch": 2.218980797636632,
      "grad_norm": 0.2430661916732788,
      "learning_rate": 5.210001231678778e-05,
      "loss": 0.1739,
      "step": 6009
    },
    {
      "epoch": 2.2193500738552436,
      "grad_norm": 0.30108213424682617,
      "learning_rate": 5.207537874122429e-05,
      "loss": 0.1691,
      "step": 6010
    },
    {
      "epoch": 2.2197193500738552,
      "grad_norm": 0.24237754940986633,
      "learning_rate": 5.2050745165660796e-05,
      "loss": 0.155,
      "step": 6011
    },
    {
      "epoch": 2.220088626292467,
      "grad_norm": 0.2368919849395752,
      "learning_rate": 5.2026111590097303e-05,
      "loss": 0.16,
      "step": 6012
    },
    {
      "epoch": 2.2204579025110784,
      "grad_norm": 0.24591723084449768,
      "learning_rate": 5.200147801453381e-05,
      "loss": 0.1673,
      "step": 6013
    },
    {
      "epoch": 2.2208271787296896,
      "grad_norm": 0.26719069480895996,
      "learning_rate": 5.197684443897032e-05,
      "loss": 0.1538,
      "step": 6014
    },
    {
      "epoch": 2.221196454948301,
      "grad_norm": 0.23640255630016327,
      "learning_rate": 5.195221086340683e-05,
      "loss": 0.173,
      "step": 6015
    },
    {
      "epoch": 2.221565731166913,
      "grad_norm": 0.2909172475337982,
      "learning_rate": 5.1927577287843335e-05,
      "loss": 0.1675,
      "step": 6016
    },
    {
      "epoch": 2.2219350073855244,
      "grad_norm": 0.27148836851119995,
      "learning_rate": 5.1902943712279836e-05,
      "loss": 0.1623,
      "step": 6017
    },
    {
      "epoch": 2.222304283604136,
      "grad_norm": 0.24277964234352112,
      "learning_rate": 5.1878310136716344e-05,
      "loss": 0.1567,
      "step": 6018
    },
    {
      "epoch": 2.2226735598227476,
      "grad_norm": 0.26548653841018677,
      "learning_rate": 5.185367656115285e-05,
      "loss": 0.1698,
      "step": 6019
    },
    {
      "epoch": 2.2230428360413588,
      "grad_norm": 0.3038986623287201,
      "learning_rate": 5.182904298558936e-05,
      "loss": 0.1572,
      "step": 6020
    },
    {
      "epoch": 2.2234121122599704,
      "grad_norm": 0.2512723505496979,
      "learning_rate": 5.180440941002587e-05,
      "loss": 0.1566,
      "step": 6021
    },
    {
      "epoch": 2.223781388478582,
      "grad_norm": 0.2630036473274231,
      "learning_rate": 5.1779775834462375e-05,
      "loss": 0.1776,
      "step": 6022
    },
    {
      "epoch": 2.2241506646971936,
      "grad_norm": 0.244293212890625,
      "learning_rate": 5.175514225889888e-05,
      "loss": 0.1666,
      "step": 6023
    },
    {
      "epoch": 2.224519940915805,
      "grad_norm": 0.25000420212745667,
      "learning_rate": 5.173050868333539e-05,
      "loss": 0.1597,
      "step": 6024
    },
    {
      "epoch": 2.2248892171344163,
      "grad_norm": 0.2320886105298996,
      "learning_rate": 5.170587510777189e-05,
      "loss": 0.1598,
      "step": 6025
    },
    {
      "epoch": 2.225258493353028,
      "grad_norm": 0.2409050315618515,
      "learning_rate": 5.16812415322084e-05,
      "loss": 0.1599,
      "step": 6026
    },
    {
      "epoch": 2.2256277695716395,
      "grad_norm": 0.24685463309288025,
      "learning_rate": 5.165660795664491e-05,
      "loss": 0.1644,
      "step": 6027
    },
    {
      "epoch": 2.225997045790251,
      "grad_norm": 0.28407877683639526,
      "learning_rate": 5.1631974381081416e-05,
      "loss": 0.1783,
      "step": 6028
    },
    {
      "epoch": 2.2263663220088628,
      "grad_norm": 0.23314602673053741,
      "learning_rate": 5.1607340805517924e-05,
      "loss": 0.1688,
      "step": 6029
    },
    {
      "epoch": 2.2267355982274744,
      "grad_norm": 0.26370736956596375,
      "learning_rate": 5.158270722995443e-05,
      "loss": 0.1768,
      "step": 6030
    },
    {
      "epoch": 2.2271048744460855,
      "grad_norm": 0.33216115832328796,
      "learning_rate": 5.155807365439094e-05,
      "loss": 0.1904,
      "step": 6031
    },
    {
      "epoch": 2.227474150664697,
      "grad_norm": 0.24514667689800262,
      "learning_rate": 5.153344007882745e-05,
      "loss": 0.1595,
      "step": 6032
    },
    {
      "epoch": 2.2278434268833087,
      "grad_norm": 0.2976848781108856,
      "learning_rate": 5.150880650326395e-05,
      "loss": 0.178,
      "step": 6033
    },
    {
      "epoch": 2.2282127031019203,
      "grad_norm": 0.2994171380996704,
      "learning_rate": 5.1484172927700456e-05,
      "loss": 0.1824,
      "step": 6034
    },
    {
      "epoch": 2.228581979320532,
      "grad_norm": 0.2807120084762573,
      "learning_rate": 5.1459539352136964e-05,
      "loss": 0.1507,
      "step": 6035
    },
    {
      "epoch": 2.228951255539143,
      "grad_norm": 0.25743022561073303,
      "learning_rate": 5.143490577657347e-05,
      "loss": 0.1567,
      "step": 6036
    },
    {
      "epoch": 2.2293205317577547,
      "grad_norm": 0.31996509432792664,
      "learning_rate": 5.141027220100998e-05,
      "loss": 0.2078,
      "step": 6037
    },
    {
      "epoch": 2.2296898079763663,
      "grad_norm": 0.2824748158454895,
      "learning_rate": 5.138563862544649e-05,
      "loss": 0.154,
      "step": 6038
    },
    {
      "epoch": 2.230059084194978,
      "grad_norm": 0.3212464153766632,
      "learning_rate": 5.1361005049882995e-05,
      "loss": 0.1818,
      "step": 6039
    },
    {
      "epoch": 2.2304283604135895,
      "grad_norm": 0.33793872594833374,
      "learning_rate": 5.13363714743195e-05,
      "loss": 0.1721,
      "step": 6040
    },
    {
      "epoch": 2.230797636632201,
      "grad_norm": 0.3192881941795349,
      "learning_rate": 5.1311737898756004e-05,
      "loss": 0.1755,
      "step": 6041
    },
    {
      "epoch": 2.2311669128508123,
      "grad_norm": 0.3159840404987335,
      "learning_rate": 5.128710432319251e-05,
      "loss": 0.1873,
      "step": 6042
    },
    {
      "epoch": 2.231536189069424,
      "grad_norm": 0.29445603489875793,
      "learning_rate": 5.126247074762902e-05,
      "loss": 0.1717,
      "step": 6043
    },
    {
      "epoch": 2.2319054652880355,
      "grad_norm": 0.3303259313106537,
      "learning_rate": 5.123783717206553e-05,
      "loss": 0.1725,
      "step": 6044
    },
    {
      "epoch": 2.232274741506647,
      "grad_norm": 0.21008101105690002,
      "learning_rate": 5.1213203596502036e-05,
      "loss": 0.1375,
      "step": 6045
    },
    {
      "epoch": 2.2326440177252587,
      "grad_norm": 0.403527170419693,
      "learning_rate": 5.1188570020938544e-05,
      "loss": 0.206,
      "step": 6046
    },
    {
      "epoch": 2.23301329394387,
      "grad_norm": 0.273002028465271,
      "learning_rate": 5.116393644537505e-05,
      "loss": 0.1686,
      "step": 6047
    },
    {
      "epoch": 2.2333825701624814,
      "grad_norm": 0.32015368342399597,
      "learning_rate": 5.113930286981156e-05,
      "loss": 0.1722,
      "step": 6048
    },
    {
      "epoch": 2.233751846381093,
      "grad_norm": 0.2369040995836258,
      "learning_rate": 5.111466929424806e-05,
      "loss": 0.1585,
      "step": 6049
    },
    {
      "epoch": 2.2341211225997046,
      "grad_norm": 0.2641754150390625,
      "learning_rate": 5.109003571868457e-05,
      "loss": 0.1575,
      "step": 6050
    },
    {
      "epoch": 2.2341211225997046,
      "eval_loss": 0.2539427876472473,
      "eval_runtime": 5.8532,
      "eval_samples_per_second": 8.542,
      "eval_steps_per_second": 1.196,
      "step": 6050
    },
    {
      "epoch": 2.2344903988183162,
      "grad_norm": 0.26975658535957336,
      "learning_rate": 5.1065402143121076e-05,
      "loss": 0.1604,
      "step": 6051
    },
    {
      "epoch": 2.234859675036928,
      "grad_norm": 0.27304011583328247,
      "learning_rate": 5.1040768567557584e-05,
      "loss": 0.1463,
      "step": 6052
    },
    {
      "epoch": 2.235228951255539,
      "grad_norm": 0.2763857841491699,
      "learning_rate": 5.101613499199409e-05,
      "loss": 0.1648,
      "step": 6053
    },
    {
      "epoch": 2.2355982274741506,
      "grad_norm": 0.23335494101047516,
      "learning_rate": 5.09915014164306e-05,
      "loss": 0.1489,
      "step": 6054
    },
    {
      "epoch": 2.235967503692762,
      "grad_norm": 0.22589267790317535,
      "learning_rate": 5.096686784086711e-05,
      "loss": 0.1691,
      "step": 6055
    },
    {
      "epoch": 2.236336779911374,
      "grad_norm": 0.27718567848205566,
      "learning_rate": 5.0942234265303616e-05,
      "loss": 0.1582,
      "step": 6056
    },
    {
      "epoch": 2.2367060561299854,
      "grad_norm": 0.2392417937517166,
      "learning_rate": 5.091760068974012e-05,
      "loss": 0.164,
      "step": 6057
    },
    {
      "epoch": 2.2370753323485966,
      "grad_norm": 0.22366121411323547,
      "learning_rate": 5.0892967114176625e-05,
      "loss": 0.18,
      "step": 6058
    },
    {
      "epoch": 2.237444608567208,
      "grad_norm": 0.24311357736587524,
      "learning_rate": 5.086833353861313e-05,
      "loss": 0.1526,
      "step": 6059
    },
    {
      "epoch": 2.2378138847858198,
      "grad_norm": 0.2334897220134735,
      "learning_rate": 5.084369996304964e-05,
      "loss": 0.1628,
      "step": 6060
    },
    {
      "epoch": 2.2381831610044314,
      "grad_norm": 0.28294169902801514,
      "learning_rate": 5.081906638748615e-05,
      "loss": 0.1767,
      "step": 6061
    },
    {
      "epoch": 2.238552437223043,
      "grad_norm": 0.24123069643974304,
      "learning_rate": 5.0794432811922656e-05,
      "loss": 0.1596,
      "step": 6062
    },
    {
      "epoch": 2.2389217134416546,
      "grad_norm": 0.26982197165489197,
      "learning_rate": 5.0769799236359164e-05,
      "loss": 0.1668,
      "step": 6063
    },
    {
      "epoch": 2.2392909896602657,
      "grad_norm": 0.3898751735687256,
      "learning_rate": 5.0745165660795665e-05,
      "loss": 0.1759,
      "step": 6064
    },
    {
      "epoch": 2.2396602658788773,
      "grad_norm": 0.2820669114589691,
      "learning_rate": 5.072053208523217e-05,
      "loss": 0.1955,
      "step": 6065
    },
    {
      "epoch": 2.240029542097489,
      "grad_norm": 0.259836882352829,
      "learning_rate": 5.069589850966868e-05,
      "loss": 0.1891,
      "step": 6066
    },
    {
      "epoch": 2.2403988183161005,
      "grad_norm": 0.25741878151893616,
      "learning_rate": 5.067126493410519e-05,
      "loss": 0.1621,
      "step": 6067
    },
    {
      "epoch": 2.240768094534712,
      "grad_norm": 0.24973520636558533,
      "learning_rate": 5.0646631358541696e-05,
      "loss": 0.1695,
      "step": 6068
    },
    {
      "epoch": 2.2411373707533233,
      "grad_norm": 0.3279678523540497,
      "learning_rate": 5.0621997782978204e-05,
      "loss": 0.1746,
      "step": 6069
    },
    {
      "epoch": 2.241506646971935,
      "grad_norm": 0.22707240283489227,
      "learning_rate": 5.059736420741471e-05,
      "loss": 0.1552,
      "step": 6070
    },
    {
      "epoch": 2.2418759231905465,
      "grad_norm": 0.2978220283985138,
      "learning_rate": 5.057273063185122e-05,
      "loss": 0.1682,
      "step": 6071
    },
    {
      "epoch": 2.242245199409158,
      "grad_norm": 0.26274436712265015,
      "learning_rate": 5.054809705628772e-05,
      "loss": 0.1682,
      "step": 6072
    },
    {
      "epoch": 2.2426144756277697,
      "grad_norm": 0.22878243029117584,
      "learning_rate": 5.052346348072423e-05,
      "loss": 0.1488,
      "step": 6073
    },
    {
      "epoch": 2.2429837518463813,
      "grad_norm": 0.3393493592739105,
      "learning_rate": 5.049882990516074e-05,
      "loss": 0.1931,
      "step": 6074
    },
    {
      "epoch": 2.2433530280649925,
      "grad_norm": 0.21743762493133545,
      "learning_rate": 5.0474196329597245e-05,
      "loss": 0.159,
      "step": 6075
    },
    {
      "epoch": 2.243722304283604,
      "grad_norm": 0.28104040026664734,
      "learning_rate": 5.044956275403375e-05,
      "loss": 0.167,
      "step": 6076
    },
    {
      "epoch": 2.2440915805022157,
      "grad_norm": 0.2588372528553009,
      "learning_rate": 5.042492917847026e-05,
      "loss": 0.1864,
      "step": 6077
    },
    {
      "epoch": 2.2444608567208273,
      "grad_norm": 0.29319703578948975,
      "learning_rate": 5.040029560290677e-05,
      "loss": 0.1803,
      "step": 6078
    },
    {
      "epoch": 2.244830132939439,
      "grad_norm": 0.29294025897979736,
      "learning_rate": 5.0375662027343276e-05,
      "loss": 0.1739,
      "step": 6079
    },
    {
      "epoch": 2.24519940915805,
      "grad_norm": 0.30922091007232666,
      "learning_rate": 5.035102845177978e-05,
      "loss": 0.1935,
      "step": 6080
    },
    {
      "epoch": 2.2455686853766617,
      "grad_norm": 0.2539861500263214,
      "learning_rate": 5.0326394876216285e-05,
      "loss": 0.1616,
      "step": 6081
    },
    {
      "epoch": 2.2459379615952733,
      "grad_norm": 0.22313763201236725,
      "learning_rate": 5.030176130065279e-05,
      "loss": 0.156,
      "step": 6082
    },
    {
      "epoch": 2.246307237813885,
      "grad_norm": 0.2427612692117691,
      "learning_rate": 5.02771277250893e-05,
      "loss": 0.1639,
      "step": 6083
    },
    {
      "epoch": 2.2466765140324965,
      "grad_norm": 0.2386532723903656,
      "learning_rate": 5.025249414952581e-05,
      "loss": 0.1675,
      "step": 6084
    },
    {
      "epoch": 2.2470457902511076,
      "grad_norm": 0.2651612162590027,
      "learning_rate": 5.0227860573962317e-05,
      "loss": 0.1824,
      "step": 6085
    },
    {
      "epoch": 2.2474150664697192,
      "grad_norm": 0.24895986914634705,
      "learning_rate": 5.0203226998398824e-05,
      "loss": 0.1803,
      "step": 6086
    },
    {
      "epoch": 2.247784342688331,
      "grad_norm": 0.2946280837059021,
      "learning_rate": 5.017859342283533e-05,
      "loss": 0.1774,
      "step": 6087
    },
    {
      "epoch": 2.2481536189069424,
      "grad_norm": 0.29397857189178467,
      "learning_rate": 5.015395984727183e-05,
      "loss": 0.1732,
      "step": 6088
    },
    {
      "epoch": 2.248522895125554,
      "grad_norm": 0.2765266001224518,
      "learning_rate": 5.012932627170834e-05,
      "loss": 0.1527,
      "step": 6089
    },
    {
      "epoch": 2.2488921713441656,
      "grad_norm": 0.2427031695842743,
      "learning_rate": 5.010469269614485e-05,
      "loss": 0.1592,
      "step": 6090
    },
    {
      "epoch": 2.249261447562777,
      "grad_norm": 0.30422443151474,
      "learning_rate": 5.008005912058136e-05,
      "loss": 0.1687,
      "step": 6091
    },
    {
      "epoch": 2.2496307237813884,
      "grad_norm": 0.263476699590683,
      "learning_rate": 5.0055425545017865e-05,
      "loss": 0.1861,
      "step": 6092
    },
    {
      "epoch": 2.25,
      "grad_norm": 0.26989686489105225,
      "learning_rate": 5.003079196945437e-05,
      "loss": 0.1977,
      "step": 6093
    },
    {
      "epoch": 2.2503692762186116,
      "grad_norm": 0.23634803295135498,
      "learning_rate": 5.000615839389088e-05,
      "loss": 0.1761,
      "step": 6094
    },
    {
      "epoch": 2.250738552437223,
      "grad_norm": 0.24952930212020874,
      "learning_rate": 4.998152481832738e-05,
      "loss": 0.1675,
      "step": 6095
    },
    {
      "epoch": 2.251107828655835,
      "grad_norm": 0.30477458238601685,
      "learning_rate": 4.995689124276389e-05,
      "loss": 0.1797,
      "step": 6096
    },
    {
      "epoch": 2.251477104874446,
      "grad_norm": 0.2522794008255005,
      "learning_rate": 4.993225766720039e-05,
      "loss": 0.1598,
      "step": 6097
    },
    {
      "epoch": 2.2518463810930576,
      "grad_norm": 0.211015984416008,
      "learning_rate": 4.99076240916369e-05,
      "loss": 0.1571,
      "step": 6098
    },
    {
      "epoch": 2.252215657311669,
      "grad_norm": 0.235479936003685,
      "learning_rate": 4.9882990516073406e-05,
      "loss": 0.1747,
      "step": 6099
    },
    {
      "epoch": 2.2525849335302808,
      "grad_norm": 0.32731950283050537,
      "learning_rate": 4.9858356940509914e-05,
      "loss": 0.1731,
      "step": 6100
    },
    {
      "epoch": 2.2525849335302808,
      "eval_loss": 0.2540842294692993,
      "eval_runtime": 5.8529,
      "eval_samples_per_second": 8.543,
      "eval_steps_per_second": 1.196,
      "step": 6100
    },
    {
      "epoch": 2.252954209748892,
      "grad_norm": 0.2917706370353699,
      "learning_rate": 4.983372336494642e-05,
      "loss": 0.1781,
      "step": 6101
    },
    {
      "epoch": 2.2533234859675035,
      "grad_norm": 0.22985264658927917,
      "learning_rate": 4.980908978938293e-05,
      "loss": 0.1669,
      "step": 6102
    },
    {
      "epoch": 2.253692762186115,
      "grad_norm": 0.25897547602653503,
      "learning_rate": 4.978445621381944e-05,
      "loss": 0.154,
      "step": 6103
    },
    {
      "epoch": 2.2540620384047267,
      "grad_norm": 0.26246926188468933,
      "learning_rate": 4.9759822638255946e-05,
      "loss": 0.162,
      "step": 6104
    },
    {
      "epoch": 2.2544313146233383,
      "grad_norm": 0.3654504120349884,
      "learning_rate": 4.973518906269245e-05,
      "loss": 0.189,
      "step": 6105
    },
    {
      "epoch": 2.25480059084195,
      "grad_norm": 0.26094144582748413,
      "learning_rate": 4.9710555487128955e-05,
      "loss": 0.1553,
      "step": 6106
    },
    {
      "epoch": 2.255169867060561,
      "grad_norm": 0.2783089876174927,
      "learning_rate": 4.968592191156546e-05,
      "loss": 0.1651,
      "step": 6107
    },
    {
      "epoch": 2.2555391432791727,
      "grad_norm": 0.27150285243988037,
      "learning_rate": 4.966128833600197e-05,
      "loss": 0.1931,
      "step": 6108
    },
    {
      "epoch": 2.2559084194977843,
      "grad_norm": 0.2587626874446869,
      "learning_rate": 4.963665476043848e-05,
      "loss": 0.1698,
      "step": 6109
    },
    {
      "epoch": 2.256277695716396,
      "grad_norm": 0.24936683475971222,
      "learning_rate": 4.9612021184874986e-05,
      "loss": 0.1808,
      "step": 6110
    },
    {
      "epoch": 2.2566469719350075,
      "grad_norm": 0.26469147205352783,
      "learning_rate": 4.9587387609311494e-05,
      "loss": 0.167,
      "step": 6111
    },
    {
      "epoch": 2.2570162481536187,
      "grad_norm": 0.27131637930870056,
      "learning_rate": 4.9562754033748e-05,
      "loss": 0.1636,
      "step": 6112
    },
    {
      "epoch": 2.2573855243722303,
      "grad_norm": 0.2650572955608368,
      "learning_rate": 4.95381204581845e-05,
      "loss": 0.151,
      "step": 6113
    },
    {
      "epoch": 2.257754800590842,
      "grad_norm": 0.2983565032482147,
      "learning_rate": 4.951348688262101e-05,
      "loss": 0.1865,
      "step": 6114
    },
    {
      "epoch": 2.2581240768094535,
      "grad_norm": 0.3544551134109497,
      "learning_rate": 4.948885330705752e-05,
      "loss": 0.1936,
      "step": 6115
    },
    {
      "epoch": 2.258493353028065,
      "grad_norm": 0.24496768414974213,
      "learning_rate": 4.9464219731494026e-05,
      "loss": 0.1865,
      "step": 6116
    },
    {
      "epoch": 2.2588626292466767,
      "grad_norm": 0.33322036266326904,
      "learning_rate": 4.9439586155930534e-05,
      "loss": 0.2055,
      "step": 6117
    },
    {
      "epoch": 2.259231905465288,
      "grad_norm": 0.2713990807533264,
      "learning_rate": 4.941495258036704e-05,
      "loss": 0.1818,
      "step": 6118
    },
    {
      "epoch": 2.2596011816838995,
      "grad_norm": 0.24263006448745728,
      "learning_rate": 4.939031900480355e-05,
      "loss": 0.18,
      "step": 6119
    },
    {
      "epoch": 2.259970457902511,
      "grad_norm": 0.25105422735214233,
      "learning_rate": 4.936568542924006e-05,
      "loss": 0.1578,
      "step": 6120
    },
    {
      "epoch": 2.2603397341211227,
      "grad_norm": 0.24577294290065765,
      "learning_rate": 4.934105185367656e-05,
      "loss": 0.1683,
      "step": 6121
    },
    {
      "epoch": 2.2607090103397343,
      "grad_norm": 0.24745669960975647,
      "learning_rate": 4.931641827811307e-05,
      "loss": 0.1599,
      "step": 6122
    },
    {
      "epoch": 2.2610782865583454,
      "grad_norm": 0.3026842772960663,
      "learning_rate": 4.9291784702549575e-05,
      "loss": 0.1932,
      "step": 6123
    },
    {
      "epoch": 2.261447562776957,
      "grad_norm": 0.2283596694469452,
      "learning_rate": 4.926715112698608e-05,
      "loss": 0.1585,
      "step": 6124
    },
    {
      "epoch": 2.2618168389955686,
      "grad_norm": 0.22106395661830902,
      "learning_rate": 4.924251755142259e-05,
      "loss": 0.1582,
      "step": 6125
    },
    {
      "epoch": 2.2621861152141802,
      "grad_norm": 0.29688745737075806,
      "learning_rate": 4.92178839758591e-05,
      "loss": 0.1856,
      "step": 6126
    },
    {
      "epoch": 2.262555391432792,
      "grad_norm": 0.2598349153995514,
      "learning_rate": 4.9193250400295606e-05,
      "loss": 0.1875,
      "step": 6127
    },
    {
      "epoch": 2.2629246676514034,
      "grad_norm": 0.2807743549346924,
      "learning_rate": 4.9168616824732114e-05,
      "loss": 0.1562,
      "step": 6128
    },
    {
      "epoch": 2.2632939438700146,
      "grad_norm": 0.23551684617996216,
      "learning_rate": 4.9143983249168615e-05,
      "loss": 0.1652,
      "step": 6129
    },
    {
      "epoch": 2.263663220088626,
      "grad_norm": 0.25193148851394653,
      "learning_rate": 4.911934967360512e-05,
      "loss": 0.1721,
      "step": 6130
    },
    {
      "epoch": 2.264032496307238,
      "grad_norm": 0.2823428511619568,
      "learning_rate": 4.909471609804163e-05,
      "loss": 0.1756,
      "step": 6131
    },
    {
      "epoch": 2.2644017725258494,
      "grad_norm": 0.26386234164237976,
      "learning_rate": 4.907008252247814e-05,
      "loss": 0.1638,
      "step": 6132
    },
    {
      "epoch": 2.264771048744461,
      "grad_norm": 0.27456212043762207,
      "learning_rate": 4.9045448946914647e-05,
      "loss": 0.1479,
      "step": 6133
    },
    {
      "epoch": 2.265140324963072,
      "grad_norm": 0.29135406017303467,
      "learning_rate": 4.9020815371351154e-05,
      "loss": 0.1744,
      "step": 6134
    },
    {
      "epoch": 2.2655096011816838,
      "grad_norm": 0.2479601353406906,
      "learning_rate": 4.899618179578766e-05,
      "loss": 0.1544,
      "step": 6135
    },
    {
      "epoch": 2.2658788774002954,
      "grad_norm": 0.29454588890075684,
      "learning_rate": 4.897154822022417e-05,
      "loss": 0.1915,
      "step": 6136
    },
    {
      "epoch": 2.266248153618907,
      "grad_norm": 0.26884835958480835,
      "learning_rate": 4.894691464466067e-05,
      "loss": 0.1669,
      "step": 6137
    },
    {
      "epoch": 2.2666174298375186,
      "grad_norm": 0.3065447509288788,
      "learning_rate": 4.892228106909718e-05,
      "loss": 0.1798,
      "step": 6138
    },
    {
      "epoch": 2.26698670605613,
      "grad_norm": 0.28422340750694275,
      "learning_rate": 4.889764749353369e-05,
      "loss": 0.1703,
      "step": 6139
    },
    {
      "epoch": 2.2673559822747413,
      "grad_norm": 0.280860036611557,
      "learning_rate": 4.8873013917970195e-05,
      "loss": 0.159,
      "step": 6140
    },
    {
      "epoch": 2.267725258493353,
      "grad_norm": 0.24607494473457336,
      "learning_rate": 4.88483803424067e-05,
      "loss": 0.1894,
      "step": 6141
    },
    {
      "epoch": 2.2680945347119645,
      "grad_norm": 0.27865591645240784,
      "learning_rate": 4.882374676684321e-05,
      "loss": 0.1778,
      "step": 6142
    },
    {
      "epoch": 2.268463810930576,
      "grad_norm": 0.25750765204429626,
      "learning_rate": 4.879911319127972e-05,
      "loss": 0.1715,
      "step": 6143
    },
    {
      "epoch": 2.2688330871491877,
      "grad_norm": 0.27004292607307434,
      "learning_rate": 4.8774479615716226e-05,
      "loss": 0.1729,
      "step": 6144
    },
    {
      "epoch": 2.269202363367799,
      "grad_norm": 0.29048722982406616,
      "learning_rate": 4.874984604015273e-05,
      "loss": 0.1731,
      "step": 6145
    },
    {
      "epoch": 2.2695716395864105,
      "grad_norm": 0.23353642225265503,
      "learning_rate": 4.8725212464589235e-05,
      "loss": 0.1484,
      "step": 6146
    },
    {
      "epoch": 2.269940915805022,
      "grad_norm": 0.2740772068500519,
      "learning_rate": 4.870057888902574e-05,
      "loss": 0.186,
      "step": 6147
    },
    {
      "epoch": 2.2703101920236337,
      "grad_norm": 0.24230247735977173,
      "learning_rate": 4.867594531346225e-05,
      "loss": 0.1568,
      "step": 6148
    },
    {
      "epoch": 2.2706794682422453,
      "grad_norm": 0.23569877445697784,
      "learning_rate": 4.865131173789876e-05,
      "loss": 0.1614,
      "step": 6149
    },
    {
      "epoch": 2.271048744460857,
      "grad_norm": 0.2888610363006592,
      "learning_rate": 4.862667816233527e-05,
      "loss": 0.1862,
      "step": 6150
    },
    {
      "epoch": 2.271048744460857,
      "eval_loss": 0.25195202231407166,
      "eval_runtime": 5.8615,
      "eval_samples_per_second": 8.53,
      "eval_steps_per_second": 1.194,
      "step": 6150
    },
    {
      "epoch": 2.271418020679468,
      "grad_norm": 0.2675657272338867,
      "learning_rate": 4.8602044586771775e-05,
      "loss": 0.1789,
      "step": 6151
    },
    {
      "epoch": 2.2717872968980797,
      "grad_norm": 0.29622554779052734,
      "learning_rate": 4.857741101120828e-05,
      "loss": 0.1676,
      "step": 6152
    },
    {
      "epoch": 2.2721565731166913,
      "grad_norm": 0.23971012234687805,
      "learning_rate": 4.8552777435644783e-05,
      "loss": 0.1561,
      "step": 6153
    },
    {
      "epoch": 2.272525849335303,
      "grad_norm": 0.28567540645599365,
      "learning_rate": 4.852814386008129e-05,
      "loss": 0.1601,
      "step": 6154
    },
    {
      "epoch": 2.2728951255539145,
      "grad_norm": 0.28229108452796936,
      "learning_rate": 4.85035102845178e-05,
      "loss": 0.1641,
      "step": 6155
    },
    {
      "epoch": 2.2732644017725256,
      "grad_norm": 0.23102128505706787,
      "learning_rate": 4.847887670895431e-05,
      "loss": 0.1663,
      "step": 6156
    },
    {
      "epoch": 2.2736336779911372,
      "grad_norm": 0.28751540184020996,
      "learning_rate": 4.8454243133390815e-05,
      "loss": 0.1815,
      "step": 6157
    },
    {
      "epoch": 2.274002954209749,
      "grad_norm": 0.2625499963760376,
      "learning_rate": 4.842960955782732e-05,
      "loss": 0.1755,
      "step": 6158
    },
    {
      "epoch": 2.2743722304283605,
      "grad_norm": 0.3934956192970276,
      "learning_rate": 4.840497598226383e-05,
      "loss": 0.1736,
      "step": 6159
    },
    {
      "epoch": 2.274741506646972,
      "grad_norm": 0.3167099356651306,
      "learning_rate": 4.838034240670034e-05,
      "loss": 0.1881,
      "step": 6160
    },
    {
      "epoch": 2.2751107828655837,
      "grad_norm": 0.2751583158969879,
      "learning_rate": 4.835570883113684e-05,
      "loss": 0.1548,
      "step": 6161
    },
    {
      "epoch": 2.275480059084195,
      "grad_norm": 0.28156930208206177,
      "learning_rate": 4.833107525557335e-05,
      "loss": 0.182,
      "step": 6162
    },
    {
      "epoch": 2.2758493353028064,
      "grad_norm": 0.2643374502658844,
      "learning_rate": 4.8306441680009855e-05,
      "loss": 0.1917,
      "step": 6163
    },
    {
      "epoch": 2.276218611521418,
      "grad_norm": 0.20647065341472626,
      "learning_rate": 4.828180810444636e-05,
      "loss": 0.1459,
      "step": 6164
    },
    {
      "epoch": 2.2765878877400296,
      "grad_norm": 0.26006636023521423,
      "learning_rate": 4.825717452888287e-05,
      "loss": 0.1585,
      "step": 6165
    },
    {
      "epoch": 2.2769571639586412,
      "grad_norm": 0.22283130884170532,
      "learning_rate": 4.823254095331938e-05,
      "loss": 0.1454,
      "step": 6166
    },
    {
      "epoch": 2.2773264401772524,
      "grad_norm": 0.281406432390213,
      "learning_rate": 4.820790737775589e-05,
      "loss": 0.1745,
      "step": 6167
    },
    {
      "epoch": 2.277695716395864,
      "grad_norm": 0.276691734790802,
      "learning_rate": 4.8183273802192395e-05,
      "loss": 0.1617,
      "step": 6168
    },
    {
      "epoch": 2.2780649926144756,
      "grad_norm": 0.30159685015678406,
      "learning_rate": 4.8158640226628896e-05,
      "loss": 0.1844,
      "step": 6169
    },
    {
      "epoch": 2.278434268833087,
      "grad_norm": 0.2677542269229889,
      "learning_rate": 4.8134006651065404e-05,
      "loss": 0.1617,
      "step": 6170
    },
    {
      "epoch": 2.278803545051699,
      "grad_norm": 0.28805914521217346,
      "learning_rate": 4.810937307550191e-05,
      "loss": 0.1471,
      "step": 6171
    },
    {
      "epoch": 2.2791728212703104,
      "grad_norm": 0.33055534958839417,
      "learning_rate": 4.808473949993842e-05,
      "loss": 0.1987,
      "step": 6172
    },
    {
      "epoch": 2.2795420974889216,
      "grad_norm": 0.5852456092834473,
      "learning_rate": 4.806010592437493e-05,
      "loss": 0.1839,
      "step": 6173
    },
    {
      "epoch": 2.279911373707533,
      "grad_norm": 0.2516915798187256,
      "learning_rate": 4.8035472348811435e-05,
      "loss": 0.1661,
      "step": 6174
    },
    {
      "epoch": 2.2802806499261448,
      "grad_norm": 0.2683897614479065,
      "learning_rate": 4.801083877324794e-05,
      "loss": 0.1737,
      "step": 6175
    },
    {
      "epoch": 2.2806499261447564,
      "grad_norm": 0.3504439890384674,
      "learning_rate": 4.7986205197684444e-05,
      "loss": 0.1915,
      "step": 6176
    },
    {
      "epoch": 2.281019202363368,
      "grad_norm": 0.26182207465171814,
      "learning_rate": 4.796157162212095e-05,
      "loss": 0.1838,
      "step": 6177
    },
    {
      "epoch": 2.281388478581979,
      "grad_norm": 0.25246095657348633,
      "learning_rate": 4.793693804655746e-05,
      "loss": 0.164,
      "step": 6178
    },
    {
      "epoch": 2.2817577548005907,
      "grad_norm": 0.22957351803779602,
      "learning_rate": 4.791230447099397e-05,
      "loss": 0.1564,
      "step": 6179
    },
    {
      "epoch": 2.2821270310192023,
      "grad_norm": 0.22464561462402344,
      "learning_rate": 4.7887670895430475e-05,
      "loss": 0.1524,
      "step": 6180
    },
    {
      "epoch": 2.282496307237814,
      "grad_norm": 0.26672062277793884,
      "learning_rate": 4.786303731986698e-05,
      "loss": 0.1687,
      "step": 6181
    },
    {
      "epoch": 2.2828655834564255,
      "grad_norm": 0.28550687432289124,
      "learning_rate": 4.783840374430349e-05,
      "loss": 0.1827,
      "step": 6182
    },
    {
      "epoch": 2.283234859675037,
      "grad_norm": 0.28829067945480347,
      "learning_rate": 4.781377016874e-05,
      "loss": 0.1431,
      "step": 6183
    },
    {
      "epoch": 2.2836041358936483,
      "grad_norm": 0.21833986043930054,
      "learning_rate": 4.77891365931765e-05,
      "loss": 0.1484,
      "step": 6184
    },
    {
      "epoch": 2.28397341211226,
      "grad_norm": 0.2626877725124359,
      "learning_rate": 4.776450301761301e-05,
      "loss": 0.1972,
      "step": 6185
    },
    {
      "epoch": 2.2843426883308715,
      "grad_norm": 0.24253219366073608,
      "learning_rate": 4.7739869442049516e-05,
      "loss": 0.1679,
      "step": 6186
    },
    {
      "epoch": 2.284711964549483,
      "grad_norm": 0.27723631262779236,
      "learning_rate": 4.7715235866486024e-05,
      "loss": 0.1592,
      "step": 6187
    },
    {
      "epoch": 2.2850812407680947,
      "grad_norm": 0.27834993600845337,
      "learning_rate": 4.769060229092253e-05,
      "loss": 0.1805,
      "step": 6188
    },
    {
      "epoch": 2.285450516986706,
      "grad_norm": 0.2531612515449524,
      "learning_rate": 4.766596871535904e-05,
      "loss": 0.1718,
      "step": 6189
    },
    {
      "epoch": 2.2858197932053175,
      "grad_norm": 0.2874826490879059,
      "learning_rate": 4.764133513979555e-05,
      "loss": 0.1774,
      "step": 6190
    },
    {
      "epoch": 2.286189069423929,
      "grad_norm": 0.3089540898799896,
      "learning_rate": 4.7616701564232055e-05,
      "loss": 0.1838,
      "step": 6191
    },
    {
      "epoch": 2.2865583456425407,
      "grad_norm": 0.2890220880508423,
      "learning_rate": 4.7592067988668556e-05,
      "loss": 0.1656,
      "step": 6192
    },
    {
      "epoch": 2.2869276218611523,
      "grad_norm": 0.27009257674217224,
      "learning_rate": 4.7567434413105064e-05,
      "loss": 0.174,
      "step": 6193
    },
    {
      "epoch": 2.287296898079764,
      "grad_norm": 0.2783883512020111,
      "learning_rate": 4.754280083754157e-05,
      "loss": 0.1623,
      "step": 6194
    },
    {
      "epoch": 2.287666174298375,
      "grad_norm": 0.2568540871143341,
      "learning_rate": 4.751816726197808e-05,
      "loss": 0.159,
      "step": 6195
    },
    {
      "epoch": 2.2880354505169866,
      "grad_norm": 0.32755327224731445,
      "learning_rate": 4.749353368641459e-05,
      "loss": 0.1919,
      "step": 6196
    },
    {
      "epoch": 2.2884047267355982,
      "grad_norm": 0.2393258959054947,
      "learning_rate": 4.7468900110851096e-05,
      "loss": 0.1569,
      "step": 6197
    },
    {
      "epoch": 2.28877400295421,
      "grad_norm": 0.3174670338630676,
      "learning_rate": 4.7444266535287603e-05,
      "loss": 0.1557,
      "step": 6198
    },
    {
      "epoch": 2.2891432791728215,
      "grad_norm": 0.2505471706390381,
      "learning_rate": 4.741963295972411e-05,
      "loss": 0.1769,
      "step": 6199
    },
    {
      "epoch": 2.2895125553914326,
      "grad_norm": 0.3370268642902374,
      "learning_rate": 4.739499938416061e-05,
      "loss": 0.1755,
      "step": 6200
    },
    {
      "epoch": 2.2895125553914326,
      "eval_loss": 0.2525298297405243,
      "eval_runtime": 5.8482,
      "eval_samples_per_second": 8.55,
      "eval_steps_per_second": 1.197,
      "step": 6200
    },
    {
      "epoch": 2.289881831610044,
      "grad_norm": 0.24336397647857666,
      "learning_rate": 4.737036580859712e-05,
      "loss": 0.1814,
      "step": 6201
    },
    {
      "epoch": 2.290251107828656,
      "grad_norm": 0.3346516788005829,
      "learning_rate": 4.734573223303363e-05,
      "loss": 0.1619,
      "step": 6202
    },
    {
      "epoch": 2.2906203840472674,
      "grad_norm": 0.2932000756263733,
      "learning_rate": 4.7321098657470136e-05,
      "loss": 0.1747,
      "step": 6203
    },
    {
      "epoch": 2.290989660265879,
      "grad_norm": 0.23745404183864594,
      "learning_rate": 4.7296465081906644e-05,
      "loss": 0.1513,
      "step": 6204
    },
    {
      "epoch": 2.2913589364844906,
      "grad_norm": 0.23634615540504456,
      "learning_rate": 4.727183150634315e-05,
      "loss": 0.1635,
      "step": 6205
    },
    {
      "epoch": 2.291728212703102,
      "grad_norm": 0.30578646063804626,
      "learning_rate": 4.724719793077966e-05,
      "loss": 0.1649,
      "step": 6206
    },
    {
      "epoch": 2.2920974889217134,
      "grad_norm": 0.26579129695892334,
      "learning_rate": 4.722256435521617e-05,
      "loss": 0.171,
      "step": 6207
    },
    {
      "epoch": 2.292466765140325,
      "grad_norm": 0.2875668406486511,
      "learning_rate": 4.719793077965267e-05,
      "loss": 0.1525,
      "step": 6208
    },
    {
      "epoch": 2.2928360413589366,
      "grad_norm": 0.26967793703079224,
      "learning_rate": 4.7173297204089176e-05,
      "loss": 0.1565,
      "step": 6209
    },
    {
      "epoch": 2.293205317577548,
      "grad_norm": 0.3409324884414673,
      "learning_rate": 4.7148663628525684e-05,
      "loss": 0.1633,
      "step": 6210
    },
    {
      "epoch": 2.2935745937961594,
      "grad_norm": 0.2611065208911896,
      "learning_rate": 4.7124030052962185e-05,
      "loss": 0.1584,
      "step": 6211
    },
    {
      "epoch": 2.293943870014771,
      "grad_norm": 0.2830992639064789,
      "learning_rate": 4.709939647739869e-05,
      "loss": 0.1608,
      "step": 6212
    },
    {
      "epoch": 2.2943131462333826,
      "grad_norm": 0.2753259539604187,
      "learning_rate": 4.70747629018352e-05,
      "loss": 0.145,
      "step": 6213
    },
    {
      "epoch": 2.294682422451994,
      "grad_norm": 0.3336687386035919,
      "learning_rate": 4.705012932627171e-05,
      "loss": 0.2059,
      "step": 6214
    },
    {
      "epoch": 2.2950516986706058,
      "grad_norm": 0.2711421549320221,
      "learning_rate": 4.702549575070822e-05,
      "loss": 0.1802,
      "step": 6215
    },
    {
      "epoch": 2.2954209748892174,
      "grad_norm": 0.28786736726760864,
      "learning_rate": 4.7000862175144725e-05,
      "loss": 0.1621,
      "step": 6216
    },
    {
      "epoch": 2.2957902511078285,
      "grad_norm": 0.27143457531929016,
      "learning_rate": 4.6976228599581226e-05,
      "loss": 0.1553,
      "step": 6217
    },
    {
      "epoch": 2.29615952732644,
      "grad_norm": 0.27764955163002014,
      "learning_rate": 4.6951595024017734e-05,
      "loss": 0.1783,
      "step": 6218
    },
    {
      "epoch": 2.2965288035450517,
      "grad_norm": 0.2730041742324829,
      "learning_rate": 4.692696144845424e-05,
      "loss": 0.1578,
      "step": 6219
    },
    {
      "epoch": 2.2968980797636633,
      "grad_norm": 0.20962485671043396,
      "learning_rate": 4.690232787289075e-05,
      "loss": 0.1599,
      "step": 6220
    },
    {
      "epoch": 2.2972673559822745,
      "grad_norm": 0.3561626076698303,
      "learning_rate": 4.687769429732726e-05,
      "loss": 0.2005,
      "step": 6221
    },
    {
      "epoch": 2.297636632200886,
      "grad_norm": 0.4217565953731537,
      "learning_rate": 4.6853060721763765e-05,
      "loss": 0.1888,
      "step": 6222
    },
    {
      "epoch": 2.2980059084194977,
      "grad_norm": 0.31085893511772156,
      "learning_rate": 4.682842714620027e-05,
      "loss": 0.1546,
      "step": 6223
    },
    {
      "epoch": 2.2983751846381093,
      "grad_norm": 0.2842569947242737,
      "learning_rate": 4.680379357063678e-05,
      "loss": 0.1618,
      "step": 6224
    },
    {
      "epoch": 2.298744460856721,
      "grad_norm": 0.30500495433807373,
      "learning_rate": 4.677915999507328e-05,
      "loss": 0.1971,
      "step": 6225
    },
    {
      "epoch": 2.2991137370753325,
      "grad_norm": 0.2780814468860626,
      "learning_rate": 4.675452641950979e-05,
      "loss": 0.1876,
      "step": 6226
    },
    {
      "epoch": 2.299483013293944,
      "grad_norm": 0.2767498791217804,
      "learning_rate": 4.67298928439463e-05,
      "loss": 0.1827,
      "step": 6227
    },
    {
      "epoch": 2.2998522895125553,
      "grad_norm": 0.2901836633682251,
      "learning_rate": 4.6705259268382805e-05,
      "loss": 0.1693,
      "step": 6228
    },
    {
      "epoch": 2.300221565731167,
      "grad_norm": 0.2251076102256775,
      "learning_rate": 4.668062569281931e-05,
      "loss": 0.1655,
      "step": 6229
    },
    {
      "epoch": 2.3005908419497785,
      "grad_norm": 0.30336886644363403,
      "learning_rate": 4.665599211725582e-05,
      "loss": 0.2055,
      "step": 6230
    },
    {
      "epoch": 2.30096011816839,
      "grad_norm": 0.2422707974910736,
      "learning_rate": 4.663135854169233e-05,
      "loss": 0.1484,
      "step": 6231
    },
    {
      "epoch": 2.3013293943870012,
      "grad_norm": 0.2562599182128906,
      "learning_rate": 4.660672496612884e-05,
      "loss": 0.1779,
      "step": 6232
    },
    {
      "epoch": 2.301698670605613,
      "grad_norm": 0.24225102365016937,
      "learning_rate": 4.658209139056534e-05,
      "loss": 0.1754,
      "step": 6233
    },
    {
      "epoch": 2.3020679468242244,
      "grad_norm": 0.32815083861351013,
      "learning_rate": 4.6557457815001846e-05,
      "loss": 0.1776,
      "step": 6234
    },
    {
      "epoch": 2.302437223042836,
      "grad_norm": 0.2695056200027466,
      "learning_rate": 4.6532824239438354e-05,
      "loss": 0.1567,
      "step": 6235
    },
    {
      "epoch": 2.3028064992614476,
      "grad_norm": 0.26233479380607605,
      "learning_rate": 4.650819066387486e-05,
      "loss": 0.1554,
      "step": 6236
    },
    {
      "epoch": 2.3031757754800593,
      "grad_norm": 0.31833940744400024,
      "learning_rate": 4.648355708831137e-05,
      "loss": 0.1921,
      "step": 6237
    },
    {
      "epoch": 2.303545051698671,
      "grad_norm": 0.2691422700881958,
      "learning_rate": 4.645892351274788e-05,
      "loss": 0.1708,
      "step": 6238
    },
    {
      "epoch": 2.303914327917282,
      "grad_norm": 0.2862132489681244,
      "learning_rate": 4.6434289937184385e-05,
      "loss": 0.144,
      "step": 6239
    },
    {
      "epoch": 2.3042836041358936,
      "grad_norm": 0.2821369767189026,
      "learning_rate": 4.640965636162089e-05,
      "loss": 0.1917,
      "step": 6240
    },
    {
      "epoch": 2.304652880354505,
      "grad_norm": 0.4799966514110565,
      "learning_rate": 4.6385022786057394e-05,
      "loss": 0.183,
      "step": 6241
    },
    {
      "epoch": 2.305022156573117,
      "grad_norm": 0.32981500029563904,
      "learning_rate": 4.63603892104939e-05,
      "loss": 0.1887,
      "step": 6242
    },
    {
      "epoch": 2.305391432791728,
      "grad_norm": 0.2521057724952698,
      "learning_rate": 4.633575563493041e-05,
      "loss": 0.181,
      "step": 6243
    },
    {
      "epoch": 2.3057607090103396,
      "grad_norm": 0.2086019515991211,
      "learning_rate": 4.631112205936692e-05,
      "loss": 0.1515,
      "step": 6244
    },
    {
      "epoch": 2.306129985228951,
      "grad_norm": 0.2835588753223419,
      "learning_rate": 4.6286488483803426e-05,
      "loss": 0.1608,
      "step": 6245
    },
    {
      "epoch": 2.306499261447563,
      "grad_norm": 0.22256755828857422,
      "learning_rate": 4.6261854908239933e-05,
      "loss": 0.1443,
      "step": 6246
    },
    {
      "epoch": 2.3068685376661744,
      "grad_norm": 0.2678048312664032,
      "learning_rate": 4.623722133267644e-05,
      "loss": 0.1637,
      "step": 6247
    },
    {
      "epoch": 2.307237813884786,
      "grad_norm": 0.2963012754917145,
      "learning_rate": 4.621258775711295e-05,
      "loss": 0.177,
      "step": 6248
    },
    {
      "epoch": 2.307607090103397,
      "grad_norm": 0.28620731830596924,
      "learning_rate": 4.618795418154945e-05,
      "loss": 0.1728,
      "step": 6249
    },
    {
      "epoch": 2.3079763663220088,
      "grad_norm": 0.2526327073574066,
      "learning_rate": 4.616332060598596e-05,
      "loss": 0.1739,
      "step": 6250
    },
    {
      "epoch": 2.3079763663220088,
      "eval_loss": 0.25240325927734375,
      "eval_runtime": 5.8578,
      "eval_samples_per_second": 8.536,
      "eval_steps_per_second": 1.195,
      "step": 6250
    },
    {
      "epoch": 2.3083456425406204,
      "grad_norm": 0.22743669152259827,
      "learning_rate": 4.6138687030422466e-05,
      "loss": 0.1491,
      "step": 6251
    },
    {
      "epoch": 2.308714918759232,
      "grad_norm": 0.26746290922164917,
      "learning_rate": 4.6114053454858974e-05,
      "loss": 0.1698,
      "step": 6252
    },
    {
      "epoch": 2.3090841949778436,
      "grad_norm": 0.31007710099220276,
      "learning_rate": 4.608941987929548e-05,
      "loss": 0.1797,
      "step": 6253
    },
    {
      "epoch": 2.3094534711964547,
      "grad_norm": 0.23614712059497833,
      "learning_rate": 4.606478630373199e-05,
      "loss": 0.1617,
      "step": 6254
    },
    {
      "epoch": 2.3098227474150663,
      "grad_norm": 0.29411062598228455,
      "learning_rate": 4.60401527281685e-05,
      "loss": 0.1738,
      "step": 6255
    },
    {
      "epoch": 2.310192023633678,
      "grad_norm": 0.27511805295944214,
      "learning_rate": 4.6015519152605005e-05,
      "loss": 0.1942,
      "step": 6256
    },
    {
      "epoch": 2.3105612998522895,
      "grad_norm": 0.2956949472427368,
      "learning_rate": 4.5990885577041506e-05,
      "loss": 0.1611,
      "step": 6257
    },
    {
      "epoch": 2.310930576070901,
      "grad_norm": 0.26745113730430603,
      "learning_rate": 4.5966252001478014e-05,
      "loss": 0.1787,
      "step": 6258
    },
    {
      "epoch": 2.3112998522895127,
      "grad_norm": 0.3227623999118805,
      "learning_rate": 4.594161842591452e-05,
      "loss": 0.1649,
      "step": 6259
    },
    {
      "epoch": 2.311669128508124,
      "grad_norm": 0.2465844452381134,
      "learning_rate": 4.591698485035103e-05,
      "loss": 0.1651,
      "step": 6260
    },
    {
      "epoch": 2.3120384047267355,
      "grad_norm": 0.33145004510879517,
      "learning_rate": 4.589235127478754e-05,
      "loss": 0.1713,
      "step": 6261
    },
    {
      "epoch": 2.312407680945347,
      "grad_norm": 0.2592099606990814,
      "learning_rate": 4.5867717699224046e-05,
      "loss": 0.1615,
      "step": 6262
    },
    {
      "epoch": 2.3127769571639587,
      "grad_norm": 0.3428521454334259,
      "learning_rate": 4.5843084123660554e-05,
      "loss": 0.1756,
      "step": 6263
    },
    {
      "epoch": 2.3131462333825703,
      "grad_norm": 0.23452892899513245,
      "learning_rate": 4.581845054809706e-05,
      "loss": 0.1468,
      "step": 6264
    },
    {
      "epoch": 2.3135155096011815,
      "grad_norm": 0.2303593009710312,
      "learning_rate": 4.579381697253356e-05,
      "loss": 0.1649,
      "step": 6265
    },
    {
      "epoch": 2.313884785819793,
      "grad_norm": 0.3098956048488617,
      "learning_rate": 4.576918339697007e-05,
      "loss": 0.2381,
      "step": 6266
    },
    {
      "epoch": 2.3142540620384047,
      "grad_norm": 0.28998488187789917,
      "learning_rate": 4.574454982140658e-05,
      "loss": 0.1796,
      "step": 6267
    },
    {
      "epoch": 2.3146233382570163,
      "grad_norm": 0.32793858647346497,
      "learning_rate": 4.5719916245843086e-05,
      "loss": 0.2072,
      "step": 6268
    },
    {
      "epoch": 2.314992614475628,
      "grad_norm": 0.2523498833179474,
      "learning_rate": 4.5695282670279594e-05,
      "loss": 0.1693,
      "step": 6269
    },
    {
      "epoch": 2.3153618906942395,
      "grad_norm": 0.2589319050312042,
      "learning_rate": 4.56706490947161e-05,
      "loss": 0.1561,
      "step": 6270
    },
    {
      "epoch": 2.3157311669128506,
      "grad_norm": 0.24488919973373413,
      "learning_rate": 4.564601551915261e-05,
      "loss": 0.1719,
      "step": 6271
    },
    {
      "epoch": 2.3161004431314622,
      "grad_norm": 0.28501635789871216,
      "learning_rate": 4.562138194358912e-05,
      "loss": 0.1895,
      "step": 6272
    },
    {
      "epoch": 2.316469719350074,
      "grad_norm": 0.24768377840518951,
      "learning_rate": 4.559674836802562e-05,
      "loss": 0.1545,
      "step": 6273
    },
    {
      "epoch": 2.3168389955686854,
      "grad_norm": 0.33171817660331726,
      "learning_rate": 4.5572114792462127e-05,
      "loss": 0.1704,
      "step": 6274
    },
    {
      "epoch": 2.317208271787297,
      "grad_norm": 0.3058539628982544,
      "learning_rate": 4.5547481216898634e-05,
      "loss": 0.1707,
      "step": 6275
    },
    {
      "epoch": 2.317577548005908,
      "grad_norm": 0.2971458435058594,
      "learning_rate": 4.552284764133514e-05,
      "loss": 0.2085,
      "step": 6276
    },
    {
      "epoch": 2.31794682422452,
      "grad_norm": 0.33643579483032227,
      "learning_rate": 4.549821406577165e-05,
      "loss": 0.193,
      "step": 6277
    },
    {
      "epoch": 2.3183161004431314,
      "grad_norm": 0.26456019282341003,
      "learning_rate": 4.547358049020816e-05,
      "loss": 0.171,
      "step": 6278
    },
    {
      "epoch": 2.318685376661743,
      "grad_norm": 0.287834107875824,
      "learning_rate": 4.5448946914644666e-05,
      "loss": 0.1846,
      "step": 6279
    },
    {
      "epoch": 2.3190546528803546,
      "grad_norm": 0.36195677518844604,
      "learning_rate": 4.5424313339081174e-05,
      "loss": 0.173,
      "step": 6280
    },
    {
      "epoch": 2.319423929098966,
      "grad_norm": 0.27335676550865173,
      "learning_rate": 4.5399679763517675e-05,
      "loss": 0.1781,
      "step": 6281
    },
    {
      "epoch": 2.3197932053175774,
      "grad_norm": 0.21281443536281586,
      "learning_rate": 4.537504618795418e-05,
      "loss": 0.1551,
      "step": 6282
    },
    {
      "epoch": 2.320162481536189,
      "grad_norm": 0.30202749371528625,
      "learning_rate": 4.535041261239069e-05,
      "loss": 0.1994,
      "step": 6283
    },
    {
      "epoch": 2.3205317577548006,
      "grad_norm": 0.2695281207561493,
      "learning_rate": 4.53257790368272e-05,
      "loss": 0.1846,
      "step": 6284
    },
    {
      "epoch": 2.320901033973412,
      "grad_norm": 0.25889331102371216,
      "learning_rate": 4.5301145461263706e-05,
      "loss": 0.1472,
      "step": 6285
    },
    {
      "epoch": 2.321270310192024,
      "grad_norm": 0.30311331152915955,
      "learning_rate": 4.5276511885700214e-05,
      "loss": 0.1949,
      "step": 6286
    },
    {
      "epoch": 2.321639586410635,
      "grad_norm": 0.2517571449279785,
      "learning_rate": 4.525187831013672e-05,
      "loss": 0.1459,
      "step": 6287
    },
    {
      "epoch": 2.3220088626292466,
      "grad_norm": 0.28349751234054565,
      "learning_rate": 4.522724473457322e-05,
      "loss": 0.1683,
      "step": 6288
    },
    {
      "epoch": 2.322378138847858,
      "grad_norm": 0.3386727273464203,
      "learning_rate": 4.520261115900973e-05,
      "loss": 0.1611,
      "step": 6289
    },
    {
      "epoch": 2.3227474150664698,
      "grad_norm": 0.2999275326728821,
      "learning_rate": 4.517797758344624e-05,
      "loss": 0.1829,
      "step": 6290
    },
    {
      "epoch": 2.3231166912850814,
      "grad_norm": 0.2655416429042816,
      "learning_rate": 4.515334400788275e-05,
      "loss": 0.191,
      "step": 6291
    },
    {
      "epoch": 2.323485967503693,
      "grad_norm": 0.2688995897769928,
      "learning_rate": 4.5128710432319254e-05,
      "loss": 0.1574,
      "step": 6292
    },
    {
      "epoch": 2.323855243722304,
      "grad_norm": 0.30712631344795227,
      "learning_rate": 4.510407685675576e-05,
      "loss": 0.184,
      "step": 6293
    },
    {
      "epoch": 2.3242245199409157,
      "grad_norm": 0.29097869992256165,
      "learning_rate": 4.507944328119227e-05,
      "loss": 0.1767,
      "step": 6294
    },
    {
      "epoch": 2.3245937961595273,
      "grad_norm": 0.2865394651889801,
      "learning_rate": 4.505480970562878e-05,
      "loss": 0.189,
      "step": 6295
    },
    {
      "epoch": 2.324963072378139,
      "grad_norm": 0.25940632820129395,
      "learning_rate": 4.503017613006528e-05,
      "loss": 0.1749,
      "step": 6296
    },
    {
      "epoch": 2.3253323485967505,
      "grad_norm": 0.3576686382293701,
      "learning_rate": 4.500554255450179e-05,
      "loss": 0.2159,
      "step": 6297
    },
    {
      "epoch": 2.3257016248153617,
      "grad_norm": 0.27279672026634216,
      "learning_rate": 4.4980908978938295e-05,
      "loss": 0.1692,
      "step": 6298
    },
    {
      "epoch": 2.3260709010339733,
      "grad_norm": 0.30613669753074646,
      "learning_rate": 4.49562754033748e-05,
      "loss": 0.1649,
      "step": 6299
    },
    {
      "epoch": 2.326440177252585,
      "grad_norm": 0.24356256425380707,
      "learning_rate": 4.493164182781131e-05,
      "loss": 0.1635,
      "step": 6300
    },
    {
      "epoch": 2.326440177252585,
      "eval_loss": 0.2532932758331299,
      "eval_runtime": 5.8654,
      "eval_samples_per_second": 8.525,
      "eval_steps_per_second": 1.193,
      "step": 6300
    },
    {
      "epoch": 2.3268094534711965,
      "grad_norm": 0.23789072036743164,
      "learning_rate": 4.490700825224782e-05,
      "loss": 0.1688,
      "step": 6301
    },
    {
      "epoch": 2.327178729689808,
      "grad_norm": 0.2563260793685913,
      "learning_rate": 4.4882374676684326e-05,
      "loss": 0.1625,
      "step": 6302
    },
    {
      "epoch": 2.3275480059084197,
      "grad_norm": 0.22482311725616455,
      "learning_rate": 4.4857741101120834e-05,
      "loss": 0.162,
      "step": 6303
    },
    {
      "epoch": 2.327917282127031,
      "grad_norm": 0.2803346812725067,
      "learning_rate": 4.4833107525557335e-05,
      "loss": 0.1773,
      "step": 6304
    },
    {
      "epoch": 2.3282865583456425,
      "grad_norm": 0.2905280292034149,
      "learning_rate": 4.480847394999384e-05,
      "loss": 0.1988,
      "step": 6305
    },
    {
      "epoch": 2.328655834564254,
      "grad_norm": 0.3134540617465973,
      "learning_rate": 4.478384037443035e-05,
      "loss": 0.1677,
      "step": 6306
    },
    {
      "epoch": 2.3290251107828657,
      "grad_norm": 0.2726571261882782,
      "learning_rate": 4.475920679886686e-05,
      "loss": 0.1652,
      "step": 6307
    },
    {
      "epoch": 2.3293943870014773,
      "grad_norm": 0.2989545166492462,
      "learning_rate": 4.473457322330337e-05,
      "loss": 0.1719,
      "step": 6308
    },
    {
      "epoch": 2.3297636632200884,
      "grad_norm": 0.3618376851081848,
      "learning_rate": 4.4709939647739875e-05,
      "loss": 0.2088,
      "step": 6309
    },
    {
      "epoch": 2.3301329394387,
      "grad_norm": 0.28249844908714294,
      "learning_rate": 4.468530607217638e-05,
      "loss": 0.1671,
      "step": 6310
    },
    {
      "epoch": 2.3305022156573116,
      "grad_norm": 0.25478148460388184,
      "learning_rate": 4.466067249661289e-05,
      "loss": 0.1461,
      "step": 6311
    },
    {
      "epoch": 2.3308714918759232,
      "grad_norm": 0.2577178180217743,
      "learning_rate": 4.463603892104939e-05,
      "loss": 0.1387,
      "step": 6312
    },
    {
      "epoch": 2.331240768094535,
      "grad_norm": 0.24544434249401093,
      "learning_rate": 4.46114053454859e-05,
      "loss": 0.1641,
      "step": 6313
    },
    {
      "epoch": 2.3316100443131464,
      "grad_norm": 0.31728222966194153,
      "learning_rate": 4.458677176992241e-05,
      "loss": 0.1959,
      "step": 6314
    },
    {
      "epoch": 2.3319793205317576,
      "grad_norm": 0.27827325463294983,
      "learning_rate": 4.4562138194358915e-05,
      "loss": 0.1544,
      "step": 6315
    },
    {
      "epoch": 2.332348596750369,
      "grad_norm": 0.2744172513484955,
      "learning_rate": 4.453750461879542e-05,
      "loss": 0.1644,
      "step": 6316
    },
    {
      "epoch": 2.332717872968981,
      "grad_norm": 0.2671150267124176,
      "learning_rate": 4.451287104323193e-05,
      "loss": 0.1753,
      "step": 6317
    },
    {
      "epoch": 2.3330871491875924,
      "grad_norm": 0.2949393093585968,
      "learning_rate": 4.448823746766844e-05,
      "loss": 0.1829,
      "step": 6318
    },
    {
      "epoch": 2.333456425406204,
      "grad_norm": 0.3001161515712738,
      "learning_rate": 4.4463603892104946e-05,
      "loss": 0.1976,
      "step": 6319
    },
    {
      "epoch": 2.333825701624815,
      "grad_norm": 0.327101469039917,
      "learning_rate": 4.443897031654145e-05,
      "loss": 0.1831,
      "step": 6320
    },
    {
      "epoch": 2.3341949778434268,
      "grad_norm": 0.2728175222873688,
      "learning_rate": 4.4414336740977955e-05,
      "loss": 0.1485,
      "step": 6321
    },
    {
      "epoch": 2.3345642540620384,
      "grad_norm": 0.303019642829895,
      "learning_rate": 4.438970316541446e-05,
      "loss": 0.1778,
      "step": 6322
    },
    {
      "epoch": 2.33493353028065,
      "grad_norm": 0.2969949543476105,
      "learning_rate": 4.436506958985097e-05,
      "loss": 0.1812,
      "step": 6323
    },
    {
      "epoch": 2.3353028064992616,
      "grad_norm": 0.2519605755805969,
      "learning_rate": 4.434043601428748e-05,
      "loss": 0.1629,
      "step": 6324
    },
    {
      "epoch": 2.335672082717873,
      "grad_norm": 0.2607879340648651,
      "learning_rate": 4.431580243872399e-05,
      "loss": 0.1566,
      "step": 6325
    },
    {
      "epoch": 2.3360413589364843,
      "grad_norm": 0.21427521109580994,
      "learning_rate": 4.4291168863160495e-05,
      "loss": 0.1424,
      "step": 6326
    },
    {
      "epoch": 2.336410635155096,
      "grad_norm": 0.2910623550415039,
      "learning_rate": 4.4266535287596996e-05,
      "loss": 0.1723,
      "step": 6327
    },
    {
      "epoch": 2.3367799113737076,
      "grad_norm": 0.24531933665275574,
      "learning_rate": 4.4241901712033504e-05,
      "loss": 0.1562,
      "step": 6328
    },
    {
      "epoch": 2.337149187592319,
      "grad_norm": 0.2238311618566513,
      "learning_rate": 4.4217268136470005e-05,
      "loss": 0.134,
      "step": 6329
    },
    {
      "epoch": 2.3375184638109308,
      "grad_norm": 0.2861112356185913,
      "learning_rate": 4.419263456090651e-05,
      "loss": 0.1606,
      "step": 6330
    },
    {
      "epoch": 2.337887740029542,
      "grad_norm": 0.24773359298706055,
      "learning_rate": 4.416800098534302e-05,
      "loss": 0.1816,
      "step": 6331
    },
    {
      "epoch": 2.3382570162481535,
      "grad_norm": 0.22608470916748047,
      "learning_rate": 4.414336740977953e-05,
      "loss": 0.1573,
      "step": 6332
    },
    {
      "epoch": 2.338626292466765,
      "grad_norm": 0.2583619952201843,
      "learning_rate": 4.4118733834216036e-05,
      "loss": 0.1374,
      "step": 6333
    },
    {
      "epoch": 2.3389955686853767,
      "grad_norm": 0.29897114634513855,
      "learning_rate": 4.4094100258652544e-05,
      "loss": 0.1787,
      "step": 6334
    },
    {
      "epoch": 2.3393648449039883,
      "grad_norm": 0.2301228940486908,
      "learning_rate": 4.406946668308905e-05,
      "loss": 0.1517,
      "step": 6335
    },
    {
      "epoch": 2.3397341211226,
      "grad_norm": 0.2524890601634979,
      "learning_rate": 4.404483310752556e-05,
      "loss": 0.172,
      "step": 6336
    },
    {
      "epoch": 2.340103397341211,
      "grad_norm": 0.25114837288856506,
      "learning_rate": 4.402019953196206e-05,
      "loss": 0.1762,
      "step": 6337
    },
    {
      "epoch": 2.3404726735598227,
      "grad_norm": 0.2742963135242462,
      "learning_rate": 4.399556595639857e-05,
      "loss": 0.1658,
      "step": 6338
    },
    {
      "epoch": 2.3408419497784343,
      "grad_norm": 0.24793750047683716,
      "learning_rate": 4.397093238083508e-05,
      "loss": 0.1616,
      "step": 6339
    },
    {
      "epoch": 2.341211225997046,
      "grad_norm": 0.29155686497688293,
      "learning_rate": 4.3946298805271585e-05,
      "loss": 0.156,
      "step": 6340
    },
    {
      "epoch": 2.3415805022156575,
      "grad_norm": 0.2763091027736664,
      "learning_rate": 4.392166522970809e-05,
      "loss": 0.181,
      "step": 6341
    },
    {
      "epoch": 2.3419497784342687,
      "grad_norm": 0.3174024224281311,
      "learning_rate": 4.38970316541446e-05,
      "loss": 0.1653,
      "step": 6342
    },
    {
      "epoch": 2.3423190546528803,
      "grad_norm": 0.28082993626594543,
      "learning_rate": 4.387239807858111e-05,
      "loss": 0.1505,
      "step": 6343
    },
    {
      "epoch": 2.342688330871492,
      "grad_norm": 0.247702494263649,
      "learning_rate": 4.3847764503017616e-05,
      "loss": 0.1567,
      "step": 6344
    },
    {
      "epoch": 2.3430576070901035,
      "grad_norm": 0.3166281282901764,
      "learning_rate": 4.382313092745412e-05,
      "loss": 0.1635,
      "step": 6345
    },
    {
      "epoch": 2.343426883308715,
      "grad_norm": 0.32806655764579773,
      "learning_rate": 4.3798497351890625e-05,
      "loss": 0.1911,
      "step": 6346
    },
    {
      "epoch": 2.3437961595273267,
      "grad_norm": 0.2769249379634857,
      "learning_rate": 4.377386377632713e-05,
      "loss": 0.1724,
      "step": 6347
    },
    {
      "epoch": 2.344165435745938,
      "grad_norm": 0.2766216993331909,
      "learning_rate": 4.374923020076364e-05,
      "loss": 0.173,
      "step": 6348
    },
    {
      "epoch": 2.3445347119645494,
      "grad_norm": 0.2586168944835663,
      "learning_rate": 4.372459662520015e-05,
      "loss": 0.1417,
      "step": 6349
    },
    {
      "epoch": 2.344903988183161,
      "grad_norm": 0.2673698961734772,
      "learning_rate": 4.3699963049636656e-05,
      "loss": 0.177,
      "step": 6350
    },
    {
      "epoch": 2.344903988183161,
      "eval_loss": 0.2545863091945648,
      "eval_runtime": 5.8601,
      "eval_samples_per_second": 8.532,
      "eval_steps_per_second": 1.195,
      "step": 6350
    },
    {
      "epoch": 2.3452732644017726,
      "grad_norm": 0.29882508516311646,
      "learning_rate": 4.3675329474073164e-05,
      "loss": 0.162,
      "step": 6351
    },
    {
      "epoch": 2.345642540620384,
      "grad_norm": 0.24953065812587738,
      "learning_rate": 4.365069589850967e-05,
      "loss": 0.1657,
      "step": 6352
    },
    {
      "epoch": 2.3460118168389954,
      "grad_norm": 0.2816498279571533,
      "learning_rate": 4.362606232294617e-05,
      "loss": 0.1632,
      "step": 6353
    },
    {
      "epoch": 2.346381093057607,
      "grad_norm": 0.2919892370700836,
      "learning_rate": 4.360142874738268e-05,
      "loss": 0.1675,
      "step": 6354
    },
    {
      "epoch": 2.3467503692762186,
      "grad_norm": 0.29279571771621704,
      "learning_rate": 4.357679517181919e-05,
      "loss": 0.1769,
      "step": 6355
    },
    {
      "epoch": 2.34711964549483,
      "grad_norm": 0.3148755431175232,
      "learning_rate": 4.35521615962557e-05,
      "loss": 0.1889,
      "step": 6356
    },
    {
      "epoch": 2.347488921713442,
      "grad_norm": 0.3025984764099121,
      "learning_rate": 4.3527528020692205e-05,
      "loss": 0.1729,
      "step": 6357
    },
    {
      "epoch": 2.3478581979320534,
      "grad_norm": 0.2500268816947937,
      "learning_rate": 4.350289444512871e-05,
      "loss": 0.1416,
      "step": 6358
    },
    {
      "epoch": 2.3482274741506646,
      "grad_norm": 0.3548707962036133,
      "learning_rate": 4.347826086956522e-05,
      "loss": 0.1898,
      "step": 6359
    },
    {
      "epoch": 2.348596750369276,
      "grad_norm": 0.36047059297561646,
      "learning_rate": 4.345362729400173e-05,
      "loss": 0.1478,
      "step": 6360
    },
    {
      "epoch": 2.348966026587888,
      "grad_norm": 0.3263508081436157,
      "learning_rate": 4.342899371843823e-05,
      "loss": 0.207,
      "step": 6361
    },
    {
      "epoch": 2.3493353028064994,
      "grad_norm": 0.24649988114833832,
      "learning_rate": 4.340436014287474e-05,
      "loss": 0.1609,
      "step": 6362
    },
    {
      "epoch": 2.3497045790251105,
      "grad_norm": 0.309300035238266,
      "learning_rate": 4.3379726567311245e-05,
      "loss": 0.1729,
      "step": 6363
    },
    {
      "epoch": 2.350073855243722,
      "grad_norm": 0.21511535346508026,
      "learning_rate": 4.335509299174775e-05,
      "loss": 0.1617,
      "step": 6364
    },
    {
      "epoch": 2.3504431314623337,
      "grad_norm": 0.3180568516254425,
      "learning_rate": 4.333045941618426e-05,
      "loss": 0.1806,
      "step": 6365
    },
    {
      "epoch": 2.3508124076809453,
      "grad_norm": 0.2766928970813751,
      "learning_rate": 4.330582584062077e-05,
      "loss": 0.1844,
      "step": 6366
    },
    {
      "epoch": 2.351181683899557,
      "grad_norm": 0.2802768051624298,
      "learning_rate": 4.3281192265057276e-05,
      "loss": 0.1818,
      "step": 6367
    },
    {
      "epoch": 2.3515509601181686,
      "grad_norm": 0.25994670391082764,
      "learning_rate": 4.3256558689493784e-05,
      "loss": 0.178,
      "step": 6368
    },
    {
      "epoch": 2.35192023633678,
      "grad_norm": 0.28026604652404785,
      "learning_rate": 4.3231925113930285e-05,
      "loss": 0.1942,
      "step": 6369
    },
    {
      "epoch": 2.3522895125553913,
      "grad_norm": 0.27653589844703674,
      "learning_rate": 4.320729153836679e-05,
      "loss": 0.187,
      "step": 6370
    },
    {
      "epoch": 2.352658788774003,
      "grad_norm": 0.24260860681533813,
      "learning_rate": 4.31826579628033e-05,
      "loss": 0.1558,
      "step": 6371
    },
    {
      "epoch": 2.3530280649926145,
      "grad_norm": 0.2591858208179474,
      "learning_rate": 4.315802438723981e-05,
      "loss": 0.1645,
      "step": 6372
    },
    {
      "epoch": 2.353397341211226,
      "grad_norm": 0.2710878252983093,
      "learning_rate": 4.313339081167632e-05,
      "loss": 0.1624,
      "step": 6373
    },
    {
      "epoch": 2.3537666174298373,
      "grad_norm": 0.29834455251693726,
      "learning_rate": 4.3108757236112825e-05,
      "loss": 0.1775,
      "step": 6374
    },
    {
      "epoch": 2.354135893648449,
      "grad_norm": 0.25172194838523865,
      "learning_rate": 4.308412366054933e-05,
      "loss": 0.1781,
      "step": 6375
    },
    {
      "epoch": 2.3545051698670605,
      "grad_norm": 0.23107177019119263,
      "learning_rate": 4.305949008498584e-05,
      "loss": 0.1469,
      "step": 6376
    },
    {
      "epoch": 2.354874446085672,
      "grad_norm": 0.2343122661113739,
      "learning_rate": 4.303485650942234e-05,
      "loss": 0.1449,
      "step": 6377
    },
    {
      "epoch": 2.3552437223042837,
      "grad_norm": 0.24231816828250885,
      "learning_rate": 4.301022293385885e-05,
      "loss": 0.1738,
      "step": 6378
    },
    {
      "epoch": 2.3556129985228953,
      "grad_norm": 0.29539164900779724,
      "learning_rate": 4.298558935829536e-05,
      "loss": 0.1668,
      "step": 6379
    },
    {
      "epoch": 2.3559822747415065,
      "grad_norm": 0.24808554351329803,
      "learning_rate": 4.2960955782731865e-05,
      "loss": 0.155,
      "step": 6380
    },
    {
      "epoch": 2.356351550960118,
      "grad_norm": 0.2660427689552307,
      "learning_rate": 4.293632220716837e-05,
      "loss": 0.1648,
      "step": 6381
    },
    {
      "epoch": 2.3567208271787297,
      "grad_norm": 0.280545175075531,
      "learning_rate": 4.291168863160488e-05,
      "loss": 0.1594,
      "step": 6382
    },
    {
      "epoch": 2.3570901033973413,
      "grad_norm": 0.31870830059051514,
      "learning_rate": 4.288705505604139e-05,
      "loss": 0.1636,
      "step": 6383
    },
    {
      "epoch": 2.357459379615953,
      "grad_norm": 0.24920117855072021,
      "learning_rate": 4.28624214804779e-05,
      "loss": 0.1608,
      "step": 6384
    },
    {
      "epoch": 2.357828655834564,
      "grad_norm": 0.24905510246753693,
      "learning_rate": 4.28377879049144e-05,
      "loss": 0.136,
      "step": 6385
    },
    {
      "epoch": 2.3581979320531756,
      "grad_norm": 0.2614610195159912,
      "learning_rate": 4.2813154329350906e-05,
      "loss": 0.1624,
      "step": 6386
    },
    {
      "epoch": 2.3585672082717872,
      "grad_norm": 0.24701111018657684,
      "learning_rate": 4.2788520753787413e-05,
      "loss": 0.1609,
      "step": 6387
    },
    {
      "epoch": 2.358936484490399,
      "grad_norm": 0.3295655846595764,
      "learning_rate": 4.276388717822392e-05,
      "loss": 0.1809,
      "step": 6388
    },
    {
      "epoch": 2.3593057607090104,
      "grad_norm": 0.3034295439720154,
      "learning_rate": 4.273925360266043e-05,
      "loss": 0.193,
      "step": 6389
    },
    {
      "epoch": 2.359675036927622,
      "grad_norm": 0.26090866327285767,
      "learning_rate": 4.271462002709694e-05,
      "loss": 0.1548,
      "step": 6390
    },
    {
      "epoch": 2.360044313146233,
      "grad_norm": 0.30005943775177,
      "learning_rate": 4.2689986451533445e-05,
      "loss": 0.1803,
      "step": 6391
    },
    {
      "epoch": 2.360413589364845,
      "grad_norm": 0.2783083915710449,
      "learning_rate": 4.266535287596995e-05,
      "loss": 0.1706,
      "step": 6392
    },
    {
      "epoch": 2.3607828655834564,
      "grad_norm": 0.303497850894928,
      "learning_rate": 4.2640719300406454e-05,
      "loss": 0.1955,
      "step": 6393
    },
    {
      "epoch": 2.361152141802068,
      "grad_norm": 0.22162476181983948,
      "learning_rate": 4.261608572484296e-05,
      "loss": 0.1579,
      "step": 6394
    },
    {
      "epoch": 2.3615214180206796,
      "grad_norm": 0.24626381695270538,
      "learning_rate": 4.259145214927947e-05,
      "loss": 0.1659,
      "step": 6395
    },
    {
      "epoch": 2.3618906942392908,
      "grad_norm": 0.22730812430381775,
      "learning_rate": 4.256681857371598e-05,
      "loss": 0.1514,
      "step": 6396
    },
    {
      "epoch": 2.3622599704579024,
      "grad_norm": 0.2827454209327698,
      "learning_rate": 4.2542184998152485e-05,
      "loss": 0.1878,
      "step": 6397
    },
    {
      "epoch": 2.362629246676514,
      "grad_norm": 0.29262617230415344,
      "learning_rate": 4.251755142258899e-05,
      "loss": 0.1694,
      "step": 6398
    },
    {
      "epoch": 2.3629985228951256,
      "grad_norm": 0.2770841717720032,
      "learning_rate": 4.24929178470255e-05,
      "loss": 0.1689,
      "step": 6399
    },
    {
      "epoch": 2.363367799113737,
      "grad_norm": 0.24018266797065735,
      "learning_rate": 4.246828427146201e-05,
      "loss": 0.1864,
      "step": 6400
    },
    {
      "epoch": 2.363367799113737,
      "eval_loss": 0.2547506093978882,
      "eval_runtime": 5.8503,
      "eval_samples_per_second": 8.547,
      "eval_steps_per_second": 1.197,
      "step": 6400
    },
    {
      "epoch": 2.363737075332349,
      "grad_norm": 0.2783015966415405,
      "learning_rate": 4.244365069589851e-05,
      "loss": 0.1675,
      "step": 6401
    },
    {
      "epoch": 2.36410635155096,
      "grad_norm": 0.28610220551490784,
      "learning_rate": 4.241901712033502e-05,
      "loss": 0.195,
      "step": 6402
    },
    {
      "epoch": 2.3644756277695715,
      "grad_norm": 0.24822600185871124,
      "learning_rate": 4.2394383544771526e-05,
      "loss": 0.1515,
      "step": 6403
    },
    {
      "epoch": 2.364844903988183,
      "grad_norm": 0.2633461654186249,
      "learning_rate": 4.2369749969208034e-05,
      "loss": 0.1539,
      "step": 6404
    },
    {
      "epoch": 2.3652141802067947,
      "grad_norm": 0.25599509477615356,
      "learning_rate": 4.234511639364454e-05,
      "loss": 0.1757,
      "step": 6405
    },
    {
      "epoch": 2.3655834564254064,
      "grad_norm": 0.34993016719818115,
      "learning_rate": 4.232048281808105e-05,
      "loss": 0.1865,
      "step": 6406
    },
    {
      "epoch": 2.3659527326440175,
      "grad_norm": 0.26258376240730286,
      "learning_rate": 4.229584924251756e-05,
      "loss": 0.1523,
      "step": 6407
    },
    {
      "epoch": 2.366322008862629,
      "grad_norm": 0.27636638283729553,
      "learning_rate": 4.227121566695406e-05,
      "loss": 0.1512,
      "step": 6408
    },
    {
      "epoch": 2.3666912850812407,
      "grad_norm": 0.3100382089614868,
      "learning_rate": 4.2246582091390566e-05,
      "loss": 0.187,
      "step": 6409
    },
    {
      "epoch": 2.3670605612998523,
      "grad_norm": 0.2492201328277588,
      "learning_rate": 4.2221948515827074e-05,
      "loss": 0.1529,
      "step": 6410
    },
    {
      "epoch": 2.367429837518464,
      "grad_norm": 0.22442567348480225,
      "learning_rate": 4.219731494026358e-05,
      "loss": 0.1607,
      "step": 6411
    },
    {
      "epoch": 2.3677991137370755,
      "grad_norm": 0.3340201675891876,
      "learning_rate": 4.217268136470009e-05,
      "loss": 0.1775,
      "step": 6412
    },
    {
      "epoch": 2.3681683899556867,
      "grad_norm": 0.2450280338525772,
      "learning_rate": 4.21480477891366e-05,
      "loss": 0.1723,
      "step": 6413
    },
    {
      "epoch": 2.3685376661742983,
      "grad_norm": 0.22480812668800354,
      "learning_rate": 4.2123414213573105e-05,
      "loss": 0.1559,
      "step": 6414
    },
    {
      "epoch": 2.36890694239291,
      "grad_norm": 0.24426712095737457,
      "learning_rate": 4.209878063800961e-05,
      "loss": 0.1476,
      "step": 6415
    },
    {
      "epoch": 2.3692762186115215,
      "grad_norm": 0.2631894052028656,
      "learning_rate": 4.2074147062446114e-05,
      "loss": 0.1911,
      "step": 6416
    },
    {
      "epoch": 2.369645494830133,
      "grad_norm": 0.2455267757177353,
      "learning_rate": 4.204951348688262e-05,
      "loss": 0.1828,
      "step": 6417
    },
    {
      "epoch": 2.3700147710487443,
      "grad_norm": 0.38169267773628235,
      "learning_rate": 4.202487991131913e-05,
      "loss": 0.1904,
      "step": 6418
    },
    {
      "epoch": 2.370384047267356,
      "grad_norm": 0.3039427101612091,
      "learning_rate": 4.200024633575564e-05,
      "loss": 0.1731,
      "step": 6419
    },
    {
      "epoch": 2.3707533234859675,
      "grad_norm": 0.27263590693473816,
      "learning_rate": 4.1975612760192146e-05,
      "loss": 0.1798,
      "step": 6420
    },
    {
      "epoch": 2.371122599704579,
      "grad_norm": 0.2618173360824585,
      "learning_rate": 4.1950979184628654e-05,
      "loss": 0.1492,
      "step": 6421
    },
    {
      "epoch": 2.3714918759231907,
      "grad_norm": 0.2843448221683502,
      "learning_rate": 4.192634560906516e-05,
      "loss": 0.1614,
      "step": 6422
    },
    {
      "epoch": 2.3718611521418023,
      "grad_norm": 0.2256832867860794,
      "learning_rate": 4.190171203350167e-05,
      "loss": 0.1657,
      "step": 6423
    },
    {
      "epoch": 2.3722304283604134,
      "grad_norm": 0.29430991411209106,
      "learning_rate": 4.187707845793817e-05,
      "loss": 0.1744,
      "step": 6424
    },
    {
      "epoch": 2.372599704579025,
      "grad_norm": 0.2796902358531952,
      "learning_rate": 4.185244488237468e-05,
      "loss": 0.1913,
      "step": 6425
    },
    {
      "epoch": 2.3729689807976366,
      "grad_norm": 0.2929346561431885,
      "learning_rate": 4.1827811306811186e-05,
      "loss": 0.1928,
      "step": 6426
    },
    {
      "epoch": 2.3733382570162482,
      "grad_norm": 0.46698009967803955,
      "learning_rate": 4.1803177731247694e-05,
      "loss": 0.1973,
      "step": 6427
    },
    {
      "epoch": 2.37370753323486,
      "grad_norm": 0.2522735297679901,
      "learning_rate": 4.17785441556842e-05,
      "loss": 0.1671,
      "step": 6428
    },
    {
      "epoch": 2.374076809453471,
      "grad_norm": 0.3110278248786926,
      "learning_rate": 4.175391058012071e-05,
      "loss": 0.1913,
      "step": 6429
    },
    {
      "epoch": 2.3744460856720826,
      "grad_norm": 0.3266526758670807,
      "learning_rate": 4.172927700455722e-05,
      "loss": 0.1873,
      "step": 6430
    },
    {
      "epoch": 2.374815361890694,
      "grad_norm": 0.2453799694776535,
      "learning_rate": 4.1704643428993726e-05,
      "loss": 0.1551,
      "step": 6431
    },
    {
      "epoch": 2.375184638109306,
      "grad_norm": 0.24761134386062622,
      "learning_rate": 4.168000985343023e-05,
      "loss": 0.159,
      "step": 6432
    },
    {
      "epoch": 2.3755539143279174,
      "grad_norm": 0.26183828711509705,
      "learning_rate": 4.1655376277866734e-05,
      "loss": 0.155,
      "step": 6433
    },
    {
      "epoch": 2.375923190546529,
      "grad_norm": 0.24739377200603485,
      "learning_rate": 4.163074270230324e-05,
      "loss": 0.1538,
      "step": 6434
    },
    {
      "epoch": 2.37629246676514,
      "grad_norm": 0.2409384697675705,
      "learning_rate": 4.160610912673975e-05,
      "loss": 0.1684,
      "step": 6435
    },
    {
      "epoch": 2.3766617429837518,
      "grad_norm": 0.28293392062187195,
      "learning_rate": 4.158147555117626e-05,
      "loss": 0.1654,
      "step": 6436
    },
    {
      "epoch": 2.3770310192023634,
      "grad_norm": 0.31228867173194885,
      "learning_rate": 4.1556841975612766e-05,
      "loss": 0.1648,
      "step": 6437
    },
    {
      "epoch": 2.377400295420975,
      "grad_norm": 0.2207242101430893,
      "learning_rate": 4.1532208400049274e-05,
      "loss": 0.155,
      "step": 6438
    },
    {
      "epoch": 2.3777695716395866,
      "grad_norm": 0.256783664226532,
      "learning_rate": 4.150757482448578e-05,
      "loss": 0.1706,
      "step": 6439
    },
    {
      "epoch": 2.3781388478581977,
      "grad_norm": 0.238917276263237,
      "learning_rate": 4.148294124892228e-05,
      "loss": 0.1494,
      "step": 6440
    },
    {
      "epoch": 2.3785081240768093,
      "grad_norm": 0.24925558269023895,
      "learning_rate": 4.145830767335879e-05,
      "loss": 0.1684,
      "step": 6441
    },
    {
      "epoch": 2.378877400295421,
      "grad_norm": 0.2319122850894928,
      "learning_rate": 4.14336740977953e-05,
      "loss": 0.1691,
      "step": 6442
    },
    {
      "epoch": 2.3792466765140325,
      "grad_norm": 0.32297733426094055,
      "learning_rate": 4.14090405222318e-05,
      "loss": 0.1691,
      "step": 6443
    },
    {
      "epoch": 2.379615952732644,
      "grad_norm": 0.3200797140598297,
      "learning_rate": 4.138440694666831e-05,
      "loss": 0.1854,
      "step": 6444
    },
    {
      "epoch": 2.3799852289512557,
      "grad_norm": 0.31530579924583435,
      "learning_rate": 4.1359773371104815e-05,
      "loss": 0.19,
      "step": 6445
    },
    {
      "epoch": 2.380354505169867,
      "grad_norm": 0.2863091826438904,
      "learning_rate": 4.133513979554132e-05,
      "loss": 0.1689,
      "step": 6446
    },
    {
      "epoch": 2.3807237813884785,
      "grad_norm": 0.2543277442455292,
      "learning_rate": 4.131050621997783e-05,
      "loss": 0.1662,
      "step": 6447
    },
    {
      "epoch": 2.38109305760709,
      "grad_norm": 0.2751203179359436,
      "learning_rate": 4.128587264441434e-05,
      "loss": 0.1662,
      "step": 6448
    },
    {
      "epoch": 2.3814623338257017,
      "grad_norm": 0.27884477376937866,
      "learning_rate": 4.126123906885084e-05,
      "loss": 0.1769,
      "step": 6449
    },
    {
      "epoch": 2.3818316100443133,
      "grad_norm": 0.29122814536094666,
      "learning_rate": 4.123660549328735e-05,
      "loss": 0.1839,
      "step": 6450
    },
    {
      "epoch": 2.3818316100443133,
      "eval_loss": 0.25416862964630127,
      "eval_runtime": 5.849,
      "eval_samples_per_second": 8.549,
      "eval_steps_per_second": 1.197,
      "step": 6450
    },
    {
      "epoch": 2.3822008862629245,
      "grad_norm": 0.42802420258522034,
      "learning_rate": 4.1211971917723856e-05,
      "loss": 0.1906,
      "step": 6451
    },
    {
      "epoch": 2.382570162481536,
      "grad_norm": 0.29035428166389465,
      "learning_rate": 4.1187338342160364e-05,
      "loss": 0.1753,
      "step": 6452
    },
    {
      "epoch": 2.3829394387001477,
      "grad_norm": 0.22821110486984253,
      "learning_rate": 4.116270476659687e-05,
      "loss": 0.1688,
      "step": 6453
    },
    {
      "epoch": 2.3833087149187593,
      "grad_norm": 0.2822718918323517,
      "learning_rate": 4.113807119103338e-05,
      "loss": 0.1432,
      "step": 6454
    },
    {
      "epoch": 2.383677991137371,
      "grad_norm": 0.2678989768028259,
      "learning_rate": 4.111343761546989e-05,
      "loss": 0.1625,
      "step": 6455
    },
    {
      "epoch": 2.3840472673559825,
      "grad_norm": 0.29239994287490845,
      "learning_rate": 4.1088804039906395e-05,
      "loss": 0.1765,
      "step": 6456
    },
    {
      "epoch": 2.3844165435745936,
      "grad_norm": 0.23325029015541077,
      "learning_rate": 4.1064170464342896e-05,
      "loss": 0.1488,
      "step": 6457
    },
    {
      "epoch": 2.3847858197932053,
      "grad_norm": 0.27885130047798157,
      "learning_rate": 4.1039536888779404e-05,
      "loss": 0.1868,
      "step": 6458
    },
    {
      "epoch": 2.385155096011817,
      "grad_norm": 0.21699808537960052,
      "learning_rate": 4.101490331321591e-05,
      "loss": 0.1384,
      "step": 6459
    },
    {
      "epoch": 2.3855243722304285,
      "grad_norm": 0.2940004765987396,
      "learning_rate": 4.099026973765242e-05,
      "loss": 0.1505,
      "step": 6460
    },
    {
      "epoch": 2.38589364844904,
      "grad_norm": 0.2605478763580322,
      "learning_rate": 4.096563616208893e-05,
      "loss": 0.1713,
      "step": 6461
    },
    {
      "epoch": 2.386262924667651,
      "grad_norm": 0.23694145679473877,
      "learning_rate": 4.0941002586525435e-05,
      "loss": 0.1476,
      "step": 6462
    },
    {
      "epoch": 2.386632200886263,
      "grad_norm": 0.3237950801849365,
      "learning_rate": 4.091636901096194e-05,
      "loss": 0.1542,
      "step": 6463
    },
    {
      "epoch": 2.3870014771048744,
      "grad_norm": 0.2576119601726532,
      "learning_rate": 4.089173543539845e-05,
      "loss": 0.1495,
      "step": 6464
    },
    {
      "epoch": 2.387370753323486,
      "grad_norm": 0.3325550854206085,
      "learning_rate": 4.086710185983495e-05,
      "loss": 0.1724,
      "step": 6465
    },
    {
      "epoch": 2.3877400295420976,
      "grad_norm": 0.3270372748374939,
      "learning_rate": 4.084246828427146e-05,
      "loss": 0.1901,
      "step": 6466
    },
    {
      "epoch": 2.3881093057607092,
      "grad_norm": 0.2555522918701172,
      "learning_rate": 4.081783470870797e-05,
      "loss": 0.1482,
      "step": 6467
    },
    {
      "epoch": 2.3884785819793204,
      "grad_norm": 0.3293311893939972,
      "learning_rate": 4.0793201133144476e-05,
      "loss": 0.191,
      "step": 6468
    },
    {
      "epoch": 2.388847858197932,
      "grad_norm": 0.3073927164077759,
      "learning_rate": 4.0768567557580984e-05,
      "loss": 0.1628,
      "step": 6469
    },
    {
      "epoch": 2.3892171344165436,
      "grad_norm": 0.24122124910354614,
      "learning_rate": 4.074393398201749e-05,
      "loss": 0.1705,
      "step": 6470
    },
    {
      "epoch": 2.389586410635155,
      "grad_norm": 0.318040132522583,
      "learning_rate": 4.0719300406454e-05,
      "loss": 0.2037,
      "step": 6471
    },
    {
      "epoch": 2.389955686853767,
      "grad_norm": 0.2927151322364807,
      "learning_rate": 4.069466683089051e-05,
      "loss": 0.1584,
      "step": 6472
    },
    {
      "epoch": 2.390324963072378,
      "grad_norm": 0.28697019815444946,
      "learning_rate": 4.067003325532701e-05,
      "loss": 0.1612,
      "step": 6473
    },
    {
      "epoch": 2.3906942392909896,
      "grad_norm": 0.2633149027824402,
      "learning_rate": 4.0645399679763516e-05,
      "loss": 0.1724,
      "step": 6474
    },
    {
      "epoch": 2.391063515509601,
      "grad_norm": 0.26802918314933777,
      "learning_rate": 4.0620766104200024e-05,
      "loss": 0.1721,
      "step": 6475
    },
    {
      "epoch": 2.3914327917282128,
      "grad_norm": 0.31744685769081116,
      "learning_rate": 4.059613252863653e-05,
      "loss": 0.1588,
      "step": 6476
    },
    {
      "epoch": 2.3918020679468244,
      "grad_norm": 0.28282245993614197,
      "learning_rate": 4.057149895307304e-05,
      "loss": 0.1743,
      "step": 6477
    },
    {
      "epoch": 2.392171344165436,
      "grad_norm": 0.280818372964859,
      "learning_rate": 4.054686537750955e-05,
      "loss": 0.1662,
      "step": 6478
    },
    {
      "epoch": 2.392540620384047,
      "grad_norm": 0.26216059923171997,
      "learning_rate": 4.0522231801946056e-05,
      "loss": 0.1816,
      "step": 6479
    },
    {
      "epoch": 2.3929098966026587,
      "grad_norm": 0.39850226044654846,
      "learning_rate": 4.0497598226382563e-05,
      "loss": 0.1928,
      "step": 6480
    },
    {
      "epoch": 2.3932791728212703,
      "grad_norm": 0.29623129963874817,
      "learning_rate": 4.0472964650819064e-05,
      "loss": 0.1813,
      "step": 6481
    },
    {
      "epoch": 2.393648449039882,
      "grad_norm": 0.41195112466812134,
      "learning_rate": 4.044833107525557e-05,
      "loss": 0.1932,
      "step": 6482
    },
    {
      "epoch": 2.3940177252584935,
      "grad_norm": 0.32941770553588867,
      "learning_rate": 4.042369749969208e-05,
      "loss": 0.1854,
      "step": 6483
    },
    {
      "epoch": 2.3943870014771047,
      "grad_norm": 0.26806557178497314,
      "learning_rate": 4.039906392412859e-05,
      "loss": 0.1528,
      "step": 6484
    },
    {
      "epoch": 2.3947562776957163,
      "grad_norm": 0.2809225916862488,
      "learning_rate": 4.0374430348565096e-05,
      "loss": 0.1575,
      "step": 6485
    },
    {
      "epoch": 2.395125553914328,
      "grad_norm": 0.30107736587524414,
      "learning_rate": 4.0349796773001604e-05,
      "loss": 0.1745,
      "step": 6486
    },
    {
      "epoch": 2.3954948301329395,
      "grad_norm": 0.2642914950847626,
      "learning_rate": 4.032516319743811e-05,
      "loss": 0.1542,
      "step": 6487
    },
    {
      "epoch": 2.395864106351551,
      "grad_norm": 0.26644206047058105,
      "learning_rate": 4.030052962187462e-05,
      "loss": 0.1712,
      "step": 6488
    },
    {
      "epoch": 2.3962333825701627,
      "grad_norm": 0.2601780295372009,
      "learning_rate": 4.027589604631112e-05,
      "loss": 0.1609,
      "step": 6489
    },
    {
      "epoch": 2.396602658788774,
      "grad_norm": 0.2623216509819031,
      "learning_rate": 4.025126247074763e-05,
      "loss": 0.1742,
      "step": 6490
    },
    {
      "epoch": 2.3969719350073855,
      "grad_norm": 0.23525527119636536,
      "learning_rate": 4.0226628895184136e-05,
      "loss": 0.1732,
      "step": 6491
    },
    {
      "epoch": 2.397341211225997,
      "grad_norm": 0.2936418354511261,
      "learning_rate": 4.0201995319620644e-05,
      "loss": 0.1575,
      "step": 6492
    },
    {
      "epoch": 2.3977104874446087,
      "grad_norm": 0.21576356887817383,
      "learning_rate": 4.017736174405715e-05,
      "loss": 0.1478,
      "step": 6493
    },
    {
      "epoch": 2.39807976366322,
      "grad_norm": 0.283373087644577,
      "learning_rate": 4.015272816849366e-05,
      "loss": 0.1716,
      "step": 6494
    },
    {
      "epoch": 2.3984490398818314,
      "grad_norm": 0.24093297123908997,
      "learning_rate": 4.012809459293017e-05,
      "loss": 0.1582,
      "step": 6495
    },
    {
      "epoch": 2.398818316100443,
      "grad_norm": 0.3448116183280945,
      "learning_rate": 4.0103461017366676e-05,
      "loss": 0.1793,
      "step": 6496
    },
    {
      "epoch": 2.3991875923190547,
      "grad_norm": 0.27197304368019104,
      "learning_rate": 4.007882744180318e-05,
      "loss": 0.1705,
      "step": 6497
    },
    {
      "epoch": 2.3995568685376663,
      "grad_norm": 0.2958301305770874,
      "learning_rate": 4.0054193866239685e-05,
      "loss": 0.1585,
      "step": 6498
    },
    {
      "epoch": 2.399926144756278,
      "grad_norm": 0.23141135275363922,
      "learning_rate": 4.002956029067619e-05,
      "loss": 0.1442,
      "step": 6499
    },
    {
      "epoch": 2.4002954209748895,
      "grad_norm": 0.27087700366973877,
      "learning_rate": 4.00049267151127e-05,
      "loss": 0.1608,
      "step": 6500
    },
    {
      "epoch": 2.4002954209748895,
      "eval_loss": 0.254189670085907,
      "eval_runtime": 5.8593,
      "eval_samples_per_second": 8.533,
      "eval_steps_per_second": 1.195,
      "step": 6500
    },
    {
      "epoch": 2.4006646971935006,
      "grad_norm": 0.3575212061405182,
      "learning_rate": 3.998029313954921e-05,
      "loss": 0.1717,
      "step": 6501
    },
    {
      "epoch": 2.401033973412112,
      "grad_norm": 0.2458181381225586,
      "learning_rate": 3.9955659563985716e-05,
      "loss": 0.1431,
      "step": 6502
    },
    {
      "epoch": 2.401403249630724,
      "grad_norm": 0.2927877902984619,
      "learning_rate": 3.9931025988422224e-05,
      "loss": 0.1897,
      "step": 6503
    },
    {
      "epoch": 2.4017725258493354,
      "grad_norm": 0.2803095877170563,
      "learning_rate": 3.990639241285873e-05,
      "loss": 0.1416,
      "step": 6504
    },
    {
      "epoch": 2.4021418020679466,
      "grad_norm": 0.28265419602394104,
      "learning_rate": 3.988175883729523e-05,
      "loss": 0.1776,
      "step": 6505
    },
    {
      "epoch": 2.402511078286558,
      "grad_norm": 0.2402728646993637,
      "learning_rate": 3.985712526173174e-05,
      "loss": 0.1624,
      "step": 6506
    },
    {
      "epoch": 2.40288035450517,
      "grad_norm": 0.26712340116500854,
      "learning_rate": 3.983249168616825e-05,
      "loss": 0.1635,
      "step": 6507
    },
    {
      "epoch": 2.4032496307237814,
      "grad_norm": 0.2670974135398865,
      "learning_rate": 3.9807858110604756e-05,
      "loss": 0.1851,
      "step": 6508
    },
    {
      "epoch": 2.403618906942393,
      "grad_norm": 0.27897876501083374,
      "learning_rate": 3.9783224535041264e-05,
      "loss": 0.1751,
      "step": 6509
    },
    {
      "epoch": 2.4039881831610046,
      "grad_norm": 0.2822982966899872,
      "learning_rate": 3.975859095947777e-05,
      "loss": 0.1765,
      "step": 6510
    },
    {
      "epoch": 2.404357459379616,
      "grad_norm": 0.23303090035915375,
      "learning_rate": 3.973395738391428e-05,
      "loss": 0.1455,
      "step": 6511
    },
    {
      "epoch": 2.4047267355982274,
      "grad_norm": 0.3002856373786926,
      "learning_rate": 3.970932380835079e-05,
      "loss": 0.1817,
      "step": 6512
    },
    {
      "epoch": 2.405096011816839,
      "grad_norm": 0.29189005494117737,
      "learning_rate": 3.968469023278729e-05,
      "loss": 0.1643,
      "step": 6513
    },
    {
      "epoch": 2.4054652880354506,
      "grad_norm": 0.240583598613739,
      "learning_rate": 3.96600566572238e-05,
      "loss": 0.1375,
      "step": 6514
    },
    {
      "epoch": 2.405834564254062,
      "grad_norm": 0.30848589539527893,
      "learning_rate": 3.9635423081660305e-05,
      "loss": 0.181,
      "step": 6515
    },
    {
      "epoch": 2.4062038404726733,
      "grad_norm": 0.26245778799057007,
      "learning_rate": 3.961078950609681e-05,
      "loss": 0.1549,
      "step": 6516
    },
    {
      "epoch": 2.406573116691285,
      "grad_norm": 0.27827879786491394,
      "learning_rate": 3.958615593053332e-05,
      "loss": 0.1605,
      "step": 6517
    },
    {
      "epoch": 2.4069423929098965,
      "grad_norm": 0.2480623573064804,
      "learning_rate": 3.956152235496983e-05,
      "loss": 0.1574,
      "step": 6518
    },
    {
      "epoch": 2.407311669128508,
      "grad_norm": 0.22942690551280975,
      "learning_rate": 3.9536888779406336e-05,
      "loss": 0.1683,
      "step": 6519
    },
    {
      "epoch": 2.4076809453471197,
      "grad_norm": 0.24263231456279755,
      "learning_rate": 3.951225520384284e-05,
      "loss": 0.1525,
      "step": 6520
    },
    {
      "epoch": 2.4080502215657313,
      "grad_norm": 0.2355821132659912,
      "learning_rate": 3.9487621628279345e-05,
      "loss": 0.1488,
      "step": 6521
    },
    {
      "epoch": 2.4084194977843425,
      "grad_norm": 0.3792753219604492,
      "learning_rate": 3.946298805271585e-05,
      "loss": 0.1787,
      "step": 6522
    },
    {
      "epoch": 2.408788774002954,
      "grad_norm": 0.2381313145160675,
      "learning_rate": 3.943835447715236e-05,
      "loss": 0.1646,
      "step": 6523
    },
    {
      "epoch": 2.4091580502215657,
      "grad_norm": 0.34551018476486206,
      "learning_rate": 3.941372090158887e-05,
      "loss": 0.168,
      "step": 6524
    },
    {
      "epoch": 2.4095273264401773,
      "grad_norm": 0.34995561838150024,
      "learning_rate": 3.9389087326025377e-05,
      "loss": 0.2189,
      "step": 6525
    },
    {
      "epoch": 2.409896602658789,
      "grad_norm": 0.3129412531852722,
      "learning_rate": 3.9364453750461884e-05,
      "loss": 0.1812,
      "step": 6526
    },
    {
      "epoch": 2.4102658788774,
      "grad_norm": 0.2410811483860016,
      "learning_rate": 3.933982017489839e-05,
      "loss": 0.1596,
      "step": 6527
    },
    {
      "epoch": 2.4106351550960117,
      "grad_norm": 0.3152921497821808,
      "learning_rate": 3.9315186599334893e-05,
      "loss": 0.2059,
      "step": 6528
    },
    {
      "epoch": 2.4110044313146233,
      "grad_norm": 0.2665098309516907,
      "learning_rate": 3.92905530237714e-05,
      "loss": 0.1669,
      "step": 6529
    },
    {
      "epoch": 2.411373707533235,
      "grad_norm": 0.2058529108762741,
      "learning_rate": 3.926591944820791e-05,
      "loss": 0.1531,
      "step": 6530
    },
    {
      "epoch": 2.4117429837518465,
      "grad_norm": 0.2851026654243469,
      "learning_rate": 3.924128587264442e-05,
      "loss": 0.1792,
      "step": 6531
    },
    {
      "epoch": 2.412112259970458,
      "grad_norm": 0.39070722460746765,
      "learning_rate": 3.9216652297080925e-05,
      "loss": 0.1588,
      "step": 6532
    },
    {
      "epoch": 2.4124815361890692,
      "grad_norm": 0.33281734585762024,
      "learning_rate": 3.919201872151743e-05,
      "loss": 0.1958,
      "step": 6533
    },
    {
      "epoch": 2.412850812407681,
      "grad_norm": 0.3490018844604492,
      "learning_rate": 3.916738514595394e-05,
      "loss": 0.1819,
      "step": 6534
    },
    {
      "epoch": 2.4132200886262924,
      "grad_norm": 0.2670537531375885,
      "learning_rate": 3.914275157039045e-05,
      "loss": 0.1873,
      "step": 6535
    },
    {
      "epoch": 2.413589364844904,
      "grad_norm": 0.2629680633544922,
      "learning_rate": 3.911811799482695e-05,
      "loss": 0.1654,
      "step": 6536
    },
    {
      "epoch": 2.4139586410635157,
      "grad_norm": 0.2617724537849426,
      "learning_rate": 3.909348441926346e-05,
      "loss": 0.1445,
      "step": 6537
    },
    {
      "epoch": 2.414327917282127,
      "grad_norm": 0.31672918796539307,
      "learning_rate": 3.9068850843699965e-05,
      "loss": 0.1756,
      "step": 6538
    },
    {
      "epoch": 2.4146971935007384,
      "grad_norm": 0.32156234979629517,
      "learning_rate": 3.904421726813647e-05,
      "loss": 0.1832,
      "step": 6539
    },
    {
      "epoch": 2.41506646971935,
      "grad_norm": 0.2983616590499878,
      "learning_rate": 3.901958369257298e-05,
      "loss": 0.1646,
      "step": 6540
    },
    {
      "epoch": 2.4154357459379616,
      "grad_norm": 0.24603486061096191,
      "learning_rate": 3.899495011700949e-05,
      "loss": 0.155,
      "step": 6541
    },
    {
      "epoch": 2.4158050221565732,
      "grad_norm": 0.25211790204048157,
      "learning_rate": 3.8970316541446e-05,
      "loss": 0.1562,
      "step": 6542
    },
    {
      "epoch": 2.416174298375185,
      "grad_norm": 0.25376376509666443,
      "learning_rate": 3.8945682965882505e-05,
      "loss": 0.1617,
      "step": 6543
    },
    {
      "epoch": 2.416543574593796,
      "grad_norm": 0.26741838455200195,
      "learning_rate": 3.8921049390319006e-05,
      "loss": 0.1598,
      "step": 6544
    },
    {
      "epoch": 2.4169128508124076,
      "grad_norm": 0.2889264225959778,
      "learning_rate": 3.8896415814755514e-05,
      "loss": 0.176,
      "step": 6545
    },
    {
      "epoch": 2.417282127031019,
      "grad_norm": 0.2853246033191681,
      "learning_rate": 3.887178223919202e-05,
      "loss": 0.1771,
      "step": 6546
    },
    {
      "epoch": 2.417651403249631,
      "grad_norm": 0.2737523913383484,
      "learning_rate": 3.884714866362853e-05,
      "loss": 0.1648,
      "step": 6547
    },
    {
      "epoch": 2.4180206794682424,
      "grad_norm": 0.28757739067077637,
      "learning_rate": 3.882251508806504e-05,
      "loss": 0.1731,
      "step": 6548
    },
    {
      "epoch": 2.4183899556868536,
      "grad_norm": 0.24770046770572662,
      "learning_rate": 3.8797881512501545e-05,
      "loss": 0.1468,
      "step": 6549
    },
    {
      "epoch": 2.418759231905465,
      "grad_norm": 0.285557359457016,
      "learning_rate": 3.877324793693805e-05,
      "loss": 0.1983,
      "step": 6550
    },
    {
      "epoch": 2.418759231905465,
      "eval_loss": 0.25278323888778687,
      "eval_runtime": 5.8555,
      "eval_samples_per_second": 8.539,
      "eval_steps_per_second": 1.195,
      "step": 6550
    },
    {
      "epoch": 2.4191285081240768,
      "grad_norm": 0.24747471511363983,
      "learning_rate": 3.874861436137456e-05,
      "loss": 0.174,
      "step": 6551
    },
    {
      "epoch": 2.4194977843426884,
      "grad_norm": 0.2874555289745331,
      "learning_rate": 3.872398078581106e-05,
      "loss": 0.1753,
      "step": 6552
    },
    {
      "epoch": 2.4198670605613,
      "grad_norm": 0.25234147906303406,
      "learning_rate": 3.869934721024757e-05,
      "loss": 0.1617,
      "step": 6553
    },
    {
      "epoch": 2.4202363367799116,
      "grad_norm": 0.29611289501190186,
      "learning_rate": 3.867471363468408e-05,
      "loss": 0.168,
      "step": 6554
    },
    {
      "epoch": 2.4206056129985227,
      "grad_norm": 0.3307589888572693,
      "learning_rate": 3.8650080059120585e-05,
      "loss": 0.1818,
      "step": 6555
    },
    {
      "epoch": 2.4209748892171343,
      "grad_norm": 0.2812064290046692,
      "learning_rate": 3.862544648355709e-05,
      "loss": 0.1712,
      "step": 6556
    },
    {
      "epoch": 2.421344165435746,
      "grad_norm": 0.26278820633888245,
      "learning_rate": 3.86008129079936e-05,
      "loss": 0.166,
      "step": 6557
    },
    {
      "epoch": 2.4217134416543575,
      "grad_norm": 0.2416645884513855,
      "learning_rate": 3.857617933243011e-05,
      "loss": 0.1666,
      "step": 6558
    },
    {
      "epoch": 2.422082717872969,
      "grad_norm": 0.2979586720466614,
      "learning_rate": 3.855154575686661e-05,
      "loss": 0.1706,
      "step": 6559
    },
    {
      "epoch": 2.4224519940915803,
      "grad_norm": 0.22586217522621155,
      "learning_rate": 3.852691218130312e-05,
      "loss": 0.1656,
      "step": 6560
    },
    {
      "epoch": 2.422821270310192,
      "grad_norm": 0.26606011390686035,
      "learning_rate": 3.850227860573962e-05,
      "loss": 0.1705,
      "step": 6561
    },
    {
      "epoch": 2.4231905465288035,
      "grad_norm": 0.2324720025062561,
      "learning_rate": 3.847764503017613e-05,
      "loss": 0.1503,
      "step": 6562
    },
    {
      "epoch": 2.423559822747415,
      "grad_norm": 0.28596732020378113,
      "learning_rate": 3.8453011454612635e-05,
      "loss": 0.139,
      "step": 6563
    },
    {
      "epoch": 2.4239290989660267,
      "grad_norm": 0.2692070007324219,
      "learning_rate": 3.842837787904914e-05,
      "loss": 0.1846,
      "step": 6564
    },
    {
      "epoch": 2.4242983751846383,
      "grad_norm": 0.27173539996147156,
      "learning_rate": 3.840374430348565e-05,
      "loss": 0.1612,
      "step": 6565
    },
    {
      "epoch": 2.4246676514032495,
      "grad_norm": 0.2125542312860489,
      "learning_rate": 3.837911072792216e-05,
      "loss": 0.1514,
      "step": 6566
    },
    {
      "epoch": 2.425036927621861,
      "grad_norm": 0.27082082629203796,
      "learning_rate": 3.8354477152358666e-05,
      "loss": 0.1656,
      "step": 6567
    },
    {
      "epoch": 2.4254062038404727,
      "grad_norm": 0.2991025745868683,
      "learning_rate": 3.8329843576795174e-05,
      "loss": 0.1816,
      "step": 6568
    },
    {
      "epoch": 2.4257754800590843,
      "grad_norm": 0.29907068610191345,
      "learning_rate": 3.8305210001231675e-05,
      "loss": 0.1679,
      "step": 6569
    },
    {
      "epoch": 2.426144756277696,
      "grad_norm": 0.25698262453079224,
      "learning_rate": 3.828057642566818e-05,
      "loss": 0.1555,
      "step": 6570
    },
    {
      "epoch": 2.426514032496307,
      "grad_norm": 0.36471596360206604,
      "learning_rate": 3.825594285010469e-05,
      "loss": 0.2193,
      "step": 6571
    },
    {
      "epoch": 2.4268833087149186,
      "grad_norm": 0.2976161241531372,
      "learning_rate": 3.82313092745412e-05,
      "loss": 0.147,
      "step": 6572
    },
    {
      "epoch": 2.4272525849335302,
      "grad_norm": 0.28336474299430847,
      "learning_rate": 3.8206675698977707e-05,
      "loss": 0.1466,
      "step": 6573
    },
    {
      "epoch": 2.427621861152142,
      "grad_norm": 0.28433650732040405,
      "learning_rate": 3.8182042123414214e-05,
      "loss": 0.1735,
      "step": 6574
    },
    {
      "epoch": 2.4279911373707534,
      "grad_norm": 0.2402041107416153,
      "learning_rate": 3.815740854785072e-05,
      "loss": 0.1596,
      "step": 6575
    },
    {
      "epoch": 2.428360413589365,
      "grad_norm": 0.2426626980304718,
      "learning_rate": 3.813277497228723e-05,
      "loss": 0.1512,
      "step": 6576
    },
    {
      "epoch": 2.428729689807976,
      "grad_norm": 0.26151448488235474,
      "learning_rate": 3.810814139672373e-05,
      "loss": 0.1678,
      "step": 6577
    },
    {
      "epoch": 2.429098966026588,
      "grad_norm": 0.2676202356815338,
      "learning_rate": 3.808350782116024e-05,
      "loss": 0.1432,
      "step": 6578
    },
    {
      "epoch": 2.4294682422451994,
      "grad_norm": 0.22569593787193298,
      "learning_rate": 3.805887424559675e-05,
      "loss": 0.1599,
      "step": 6579
    },
    {
      "epoch": 2.429837518463811,
      "grad_norm": 0.27633586525917053,
      "learning_rate": 3.8034240670033255e-05,
      "loss": 0.169,
      "step": 6580
    },
    {
      "epoch": 2.4302067946824226,
      "grad_norm": 0.2126288115978241,
      "learning_rate": 3.800960709446976e-05,
      "loss": 0.1438,
      "step": 6581
    },
    {
      "epoch": 2.430576070901034,
      "grad_norm": 0.30395951867103577,
      "learning_rate": 3.798497351890627e-05,
      "loss": 0.182,
      "step": 6582
    },
    {
      "epoch": 2.4309453471196454,
      "grad_norm": 0.3230239748954773,
      "learning_rate": 3.796033994334278e-05,
      "loss": 0.1505,
      "step": 6583
    },
    {
      "epoch": 2.431314623338257,
      "grad_norm": 0.46397629380226135,
      "learning_rate": 3.7935706367779286e-05,
      "loss": 0.2115,
      "step": 6584
    },
    {
      "epoch": 2.4316838995568686,
      "grad_norm": 0.260724276304245,
      "learning_rate": 3.791107279221579e-05,
      "loss": 0.1585,
      "step": 6585
    },
    {
      "epoch": 2.43205317577548,
      "grad_norm": 0.2859072685241699,
      "learning_rate": 3.7886439216652295e-05,
      "loss": 0.165,
      "step": 6586
    },
    {
      "epoch": 2.432422451994092,
      "grad_norm": 0.24744963645935059,
      "learning_rate": 3.78618056410888e-05,
      "loss": 0.1487,
      "step": 6587
    },
    {
      "epoch": 2.432791728212703,
      "grad_norm": 0.22620677947998047,
      "learning_rate": 3.783717206552531e-05,
      "loss": 0.1647,
      "step": 6588
    },
    {
      "epoch": 2.4331610044313146,
      "grad_norm": 0.2779679000377655,
      "learning_rate": 3.781253848996182e-05,
      "loss": 0.2016,
      "step": 6589
    },
    {
      "epoch": 2.433530280649926,
      "grad_norm": 0.33739492297172546,
      "learning_rate": 3.778790491439833e-05,
      "loss": 0.1703,
      "step": 6590
    },
    {
      "epoch": 2.4338995568685378,
      "grad_norm": 0.2833116054534912,
      "learning_rate": 3.7763271338834835e-05,
      "loss": 0.1663,
      "step": 6591
    },
    {
      "epoch": 2.4342688330871494,
      "grad_norm": 0.26829156279563904,
      "learning_rate": 3.773863776327134e-05,
      "loss": 0.1442,
      "step": 6592
    },
    {
      "epoch": 2.4346381093057605,
      "grad_norm": 0.27546414732933044,
      "learning_rate": 3.7714004187707844e-05,
      "loss": 0.1851,
      "step": 6593
    },
    {
      "epoch": 2.435007385524372,
      "grad_norm": 0.22478356957435608,
      "learning_rate": 3.768937061214435e-05,
      "loss": 0.1704,
      "step": 6594
    },
    {
      "epoch": 2.4353766617429837,
      "grad_norm": 0.31221652030944824,
      "learning_rate": 3.766473703658086e-05,
      "loss": 0.1902,
      "step": 6595
    },
    {
      "epoch": 2.4357459379615953,
      "grad_norm": 0.29108351469039917,
      "learning_rate": 3.764010346101737e-05,
      "loss": 0.1539,
      "step": 6596
    },
    {
      "epoch": 2.436115214180207,
      "grad_norm": 0.2577853798866272,
      "learning_rate": 3.7615469885453875e-05,
      "loss": 0.1775,
      "step": 6597
    },
    {
      "epoch": 2.4364844903988185,
      "grad_norm": 0.29579925537109375,
      "learning_rate": 3.759083630989038e-05,
      "loss": 0.1762,
      "step": 6598
    },
    {
      "epoch": 2.4368537666174297,
      "grad_norm": 0.2631477415561676,
      "learning_rate": 3.756620273432689e-05,
      "loss": 0.1661,
      "step": 6599
    },
    {
      "epoch": 2.4372230428360413,
      "grad_norm": 0.2739509344100952,
      "learning_rate": 3.75415691587634e-05,
      "loss": 0.1698,
      "step": 6600
    },
    {
      "epoch": 2.4372230428360413,
      "eval_loss": 0.25389644503593445,
      "eval_runtime": 5.8427,
      "eval_samples_per_second": 8.558,
      "eval_steps_per_second": 1.198,
      "step": 6600
    },
    {
      "epoch": 2.437592319054653,
      "grad_norm": 0.36615848541259766,
      "learning_rate": 3.75169355831999e-05,
      "loss": 0.1729,
      "step": 6601
    },
    {
      "epoch": 2.4379615952732645,
      "grad_norm": 0.27296942472457886,
      "learning_rate": 3.749230200763641e-05,
      "loss": 0.1624,
      "step": 6602
    },
    {
      "epoch": 2.438330871491876,
      "grad_norm": 0.27478161454200745,
      "learning_rate": 3.7467668432072915e-05,
      "loss": 0.1428,
      "step": 6603
    },
    {
      "epoch": 2.4387001477104873,
      "grad_norm": 0.28324443101882935,
      "learning_rate": 3.744303485650942e-05,
      "loss": 0.1883,
      "step": 6604
    },
    {
      "epoch": 2.439069423929099,
      "grad_norm": 0.29894813895225525,
      "learning_rate": 3.741840128094593e-05,
      "loss": 0.1899,
      "step": 6605
    },
    {
      "epoch": 2.4394387001477105,
      "grad_norm": 0.3002159595489502,
      "learning_rate": 3.739376770538244e-05,
      "loss": 0.1571,
      "step": 6606
    },
    {
      "epoch": 2.439807976366322,
      "grad_norm": 0.274058073759079,
      "learning_rate": 3.736913412981895e-05,
      "loss": 0.1624,
      "step": 6607
    },
    {
      "epoch": 2.4401772525849337,
      "grad_norm": 0.27943941950798035,
      "learning_rate": 3.7344500554255455e-05,
      "loss": 0.1609,
      "step": 6608
    },
    {
      "epoch": 2.4405465288035453,
      "grad_norm": 0.24010521173477173,
      "learning_rate": 3.7319866978691956e-05,
      "loss": 0.1383,
      "step": 6609
    },
    {
      "epoch": 2.4409158050221564,
      "grad_norm": 0.28225481510162354,
      "learning_rate": 3.7295233403128464e-05,
      "loss": 0.172,
      "step": 6610
    },
    {
      "epoch": 2.441285081240768,
      "grad_norm": 0.26481708884239197,
      "learning_rate": 3.727059982756497e-05,
      "loss": 0.1562,
      "step": 6611
    },
    {
      "epoch": 2.4416543574593796,
      "grad_norm": 0.2832199037075043,
      "learning_rate": 3.724596625200148e-05,
      "loss": 0.1851,
      "step": 6612
    },
    {
      "epoch": 2.4420236336779912,
      "grad_norm": 0.2782345712184906,
      "learning_rate": 3.722133267643799e-05,
      "loss": 0.1718,
      "step": 6613
    },
    {
      "epoch": 2.442392909896603,
      "grad_norm": 0.28615009784698486,
      "learning_rate": 3.7196699100874495e-05,
      "loss": 0.1611,
      "step": 6614
    },
    {
      "epoch": 2.442762186115214,
      "grad_norm": 0.30237308144569397,
      "learning_rate": 3.7172065525311e-05,
      "loss": 0.1778,
      "step": 6615
    },
    {
      "epoch": 2.4431314623338256,
      "grad_norm": 0.26544320583343506,
      "learning_rate": 3.714743194974751e-05,
      "loss": 0.162,
      "step": 6616
    },
    {
      "epoch": 2.443500738552437,
      "grad_norm": 0.2903136610984802,
      "learning_rate": 3.712279837418401e-05,
      "loss": 0.1553,
      "step": 6617
    },
    {
      "epoch": 2.443870014771049,
      "grad_norm": 0.26397714018821716,
      "learning_rate": 3.709816479862052e-05,
      "loss": 0.1678,
      "step": 6618
    },
    {
      "epoch": 2.4442392909896604,
      "grad_norm": 0.2656189799308777,
      "learning_rate": 3.707353122305703e-05,
      "loss": 0.1562,
      "step": 6619
    },
    {
      "epoch": 2.444608567208272,
      "grad_norm": 0.2826843857765198,
      "learning_rate": 3.7048897647493536e-05,
      "loss": 0.1817,
      "step": 6620
    },
    {
      "epoch": 2.444977843426883,
      "grad_norm": 0.2653213441371918,
      "learning_rate": 3.7024264071930043e-05,
      "loss": 0.1675,
      "step": 6621
    },
    {
      "epoch": 2.445347119645495,
      "grad_norm": 0.30040302872657776,
      "learning_rate": 3.699963049636655e-05,
      "loss": 0.1999,
      "step": 6622
    },
    {
      "epoch": 2.4457163958641064,
      "grad_norm": 0.30907782912254333,
      "learning_rate": 3.697499692080306e-05,
      "loss": 0.1861,
      "step": 6623
    },
    {
      "epoch": 2.446085672082718,
      "grad_norm": 0.2911081612110138,
      "learning_rate": 3.695036334523957e-05,
      "loss": 0.1661,
      "step": 6624
    },
    {
      "epoch": 2.446454948301329,
      "grad_norm": 0.2689531743526459,
      "learning_rate": 3.692572976967607e-05,
      "loss": 0.1687,
      "step": 6625
    },
    {
      "epoch": 2.4468242245199407,
      "grad_norm": 0.3597908914089203,
      "learning_rate": 3.6901096194112576e-05,
      "loss": 0.203,
      "step": 6626
    },
    {
      "epoch": 2.4471935007385524,
      "grad_norm": 0.2764434814453125,
      "learning_rate": 3.6876462618549084e-05,
      "loss": 0.1758,
      "step": 6627
    },
    {
      "epoch": 2.447562776957164,
      "grad_norm": 0.25817057490348816,
      "learning_rate": 3.685182904298559e-05,
      "loss": 0.158,
      "step": 6628
    },
    {
      "epoch": 2.4479320531757756,
      "grad_norm": 0.27736422419548035,
      "learning_rate": 3.68271954674221e-05,
      "loss": 0.1543,
      "step": 6629
    },
    {
      "epoch": 2.448301329394387,
      "grad_norm": 0.3356666564941406,
      "learning_rate": 3.680256189185861e-05,
      "loss": 0.172,
      "step": 6630
    },
    {
      "epoch": 2.4486706056129988,
      "grad_norm": 0.3232395350933075,
      "learning_rate": 3.6777928316295115e-05,
      "loss": 0.2051,
      "step": 6631
    },
    {
      "epoch": 2.44903988183161,
      "grad_norm": 0.3052434027194977,
      "learning_rate": 3.6753294740731616e-05,
      "loss": 0.1914,
      "step": 6632
    },
    {
      "epoch": 2.4494091580502215,
      "grad_norm": 0.3377300500869751,
      "learning_rate": 3.6728661165168124e-05,
      "loss": 0.1866,
      "step": 6633
    },
    {
      "epoch": 2.449778434268833,
      "grad_norm": 0.2937794327735901,
      "learning_rate": 3.670402758960463e-05,
      "loss": 0.1442,
      "step": 6634
    },
    {
      "epoch": 2.4501477104874447,
      "grad_norm": 0.37502825260162354,
      "learning_rate": 3.667939401404114e-05,
      "loss": 0.1719,
      "step": 6635
    },
    {
      "epoch": 2.450516986706056,
      "grad_norm": 0.25513651967048645,
      "learning_rate": 3.665476043847765e-05,
      "loss": 0.1497,
      "step": 6636
    },
    {
      "epoch": 2.4508862629246675,
      "grad_norm": 0.2909369468688965,
      "learning_rate": 3.6630126862914156e-05,
      "loss": 0.1646,
      "step": 6637
    },
    {
      "epoch": 2.451255539143279,
      "grad_norm": 0.24133659899234772,
      "learning_rate": 3.6605493287350664e-05,
      "loss": 0.1582,
      "step": 6638
    },
    {
      "epoch": 2.4516248153618907,
      "grad_norm": 0.27683207392692566,
      "learning_rate": 3.658085971178717e-05,
      "loss": 0.1535,
      "step": 6639
    },
    {
      "epoch": 2.4519940915805023,
      "grad_norm": 0.2554895877838135,
      "learning_rate": 3.655622613622367e-05,
      "loss": 0.1641,
      "step": 6640
    },
    {
      "epoch": 2.452363367799114,
      "grad_norm": 0.3127109408378601,
      "learning_rate": 3.653159256066018e-05,
      "loss": 0.1911,
      "step": 6641
    },
    {
      "epoch": 2.4527326440177255,
      "grad_norm": 0.28088274598121643,
      "learning_rate": 3.650695898509669e-05,
      "loss": 0.184,
      "step": 6642
    },
    {
      "epoch": 2.4531019202363367,
      "grad_norm": 0.29056841135025024,
      "learning_rate": 3.6482325409533196e-05,
      "loss": 0.1691,
      "step": 6643
    },
    {
      "epoch": 2.4534711964549483,
      "grad_norm": 0.24302606284618378,
      "learning_rate": 3.6457691833969704e-05,
      "loss": 0.1638,
      "step": 6644
    },
    {
      "epoch": 2.45384047267356,
      "grad_norm": 0.2458467334508896,
      "learning_rate": 3.643305825840621e-05,
      "loss": 0.1537,
      "step": 6645
    },
    {
      "epoch": 2.4542097488921715,
      "grad_norm": 0.3112828731536865,
      "learning_rate": 3.640842468284272e-05,
      "loss": 0.1683,
      "step": 6646
    },
    {
      "epoch": 2.4545790251107826,
      "grad_norm": 0.2735547423362732,
      "learning_rate": 3.638379110727923e-05,
      "loss": 0.1795,
      "step": 6647
    },
    {
      "epoch": 2.4549483013293942,
      "grad_norm": 0.2628132700920105,
      "learning_rate": 3.635915753171573e-05,
      "loss": 0.1769,
      "step": 6648
    },
    {
      "epoch": 2.455317577548006,
      "grad_norm": 0.3441419303417206,
      "learning_rate": 3.6334523956152236e-05,
      "loss": 0.1899,
      "step": 6649
    },
    {
      "epoch": 2.4556868537666174,
      "grad_norm": 0.31522902846336365,
      "learning_rate": 3.6309890380588744e-05,
      "loss": 0.2296,
      "step": 6650
    },
    {
      "epoch": 2.4556868537666174,
      "eval_loss": 0.2550058662891388,
      "eval_runtime": 5.8477,
      "eval_samples_per_second": 8.55,
      "eval_steps_per_second": 1.197,
      "step": 6650
    },
    {
      "epoch": 2.456056129985229,
      "grad_norm": 0.24651025235652924,
      "learning_rate": 3.628525680502525e-05,
      "loss": 0.1598,
      "step": 6651
    },
    {
      "epoch": 2.4564254062038406,
      "grad_norm": 0.28866273164749146,
      "learning_rate": 3.626062322946176e-05,
      "loss": 0.1813,
      "step": 6652
    },
    {
      "epoch": 2.456794682422452,
      "grad_norm": 0.39629366993904114,
      "learning_rate": 3.623598965389827e-05,
      "loss": 0.189,
      "step": 6653
    },
    {
      "epoch": 2.4571639586410634,
      "grad_norm": 0.29161256551742554,
      "learning_rate": 3.6211356078334776e-05,
      "loss": 0.1619,
      "step": 6654
    },
    {
      "epoch": 2.457533234859675,
      "grad_norm": 0.21428142488002777,
      "learning_rate": 3.6186722502771284e-05,
      "loss": 0.1619,
      "step": 6655
    },
    {
      "epoch": 2.4579025110782866,
      "grad_norm": 0.23037759959697723,
      "learning_rate": 3.6162088927207785e-05,
      "loss": 0.1636,
      "step": 6656
    },
    {
      "epoch": 2.458271787296898,
      "grad_norm": 0.25886794924736023,
      "learning_rate": 3.613745535164429e-05,
      "loss": 0.1746,
      "step": 6657
    },
    {
      "epoch": 2.4586410635155094,
      "grad_norm": 0.27223896980285645,
      "learning_rate": 3.61128217760808e-05,
      "loss": 0.1576,
      "step": 6658
    },
    {
      "epoch": 2.459010339734121,
      "grad_norm": 0.31890416145324707,
      "learning_rate": 3.608818820051731e-05,
      "loss": 0.1725,
      "step": 6659
    },
    {
      "epoch": 2.4593796159527326,
      "grad_norm": 0.2658154368400574,
      "learning_rate": 3.6063554624953816e-05,
      "loss": 0.1582,
      "step": 6660
    },
    {
      "epoch": 2.459748892171344,
      "grad_norm": 0.2270040065050125,
      "learning_rate": 3.6038921049390324e-05,
      "loss": 0.1548,
      "step": 6661
    },
    {
      "epoch": 2.460118168389956,
      "grad_norm": 0.29490986466407776,
      "learning_rate": 3.601428747382683e-05,
      "loss": 0.1735,
      "step": 6662
    },
    {
      "epoch": 2.4604874446085674,
      "grad_norm": 0.23410217463970184,
      "learning_rate": 3.598965389826334e-05,
      "loss": 0.1563,
      "step": 6663
    },
    {
      "epoch": 2.4608567208271785,
      "grad_norm": 0.2288871556520462,
      "learning_rate": 3.596502032269984e-05,
      "loss": 0.1553,
      "step": 6664
    },
    {
      "epoch": 2.46122599704579,
      "grad_norm": 0.2695292830467224,
      "learning_rate": 3.594038674713635e-05,
      "loss": 0.1783,
      "step": 6665
    },
    {
      "epoch": 2.4615952732644018,
      "grad_norm": 0.24455784261226654,
      "learning_rate": 3.5915753171572857e-05,
      "loss": 0.1736,
      "step": 6666
    },
    {
      "epoch": 2.4619645494830134,
      "grad_norm": 0.28997039794921875,
      "learning_rate": 3.5891119596009364e-05,
      "loss": 0.175,
      "step": 6667
    },
    {
      "epoch": 2.462333825701625,
      "grad_norm": 0.2736116051673889,
      "learning_rate": 3.586648602044587e-05,
      "loss": 0.1661,
      "step": 6668
    },
    {
      "epoch": 2.462703101920236,
      "grad_norm": 0.23794032633304596,
      "learning_rate": 3.584185244488238e-05,
      "loss": 0.1631,
      "step": 6669
    },
    {
      "epoch": 2.4630723781388477,
      "grad_norm": 0.2668376564979553,
      "learning_rate": 3.581721886931889e-05,
      "loss": 0.1798,
      "step": 6670
    },
    {
      "epoch": 2.4634416543574593,
      "grad_norm": 0.25068479776382446,
      "learning_rate": 3.5792585293755396e-05,
      "loss": 0.1536,
      "step": 6671
    },
    {
      "epoch": 2.463810930576071,
      "grad_norm": 0.28217265009880066,
      "learning_rate": 3.57679517181919e-05,
      "loss": 0.1651,
      "step": 6672
    },
    {
      "epoch": 2.4641802067946825,
      "grad_norm": 0.2928770184516907,
      "learning_rate": 3.5743318142628405e-05,
      "loss": 0.1854,
      "step": 6673
    },
    {
      "epoch": 2.464549483013294,
      "grad_norm": 0.2543809115886688,
      "learning_rate": 3.571868456706491e-05,
      "loss": 0.1467,
      "step": 6674
    },
    {
      "epoch": 2.4649187592319053,
      "grad_norm": 0.2677014470100403,
      "learning_rate": 3.5694050991501414e-05,
      "loss": 0.1836,
      "step": 6675
    },
    {
      "epoch": 2.465288035450517,
      "grad_norm": 0.24781964719295502,
      "learning_rate": 3.566941741593792e-05,
      "loss": 0.1594,
      "step": 6676
    },
    {
      "epoch": 2.4656573116691285,
      "grad_norm": 0.2676795721054077,
      "learning_rate": 3.564478384037443e-05,
      "loss": 0.1631,
      "step": 6677
    },
    {
      "epoch": 2.46602658788774,
      "grad_norm": 0.23839810490608215,
      "learning_rate": 3.562015026481094e-05,
      "loss": 0.145,
      "step": 6678
    },
    {
      "epoch": 2.4663958641063517,
      "grad_norm": 0.2979888916015625,
      "learning_rate": 3.5595516689247445e-05,
      "loss": 0.1834,
      "step": 6679
    },
    {
      "epoch": 2.466765140324963,
      "grad_norm": 0.2651112377643585,
      "learning_rate": 3.557088311368395e-05,
      "loss": 0.1575,
      "step": 6680
    },
    {
      "epoch": 2.4671344165435745,
      "grad_norm": 0.32368481159210205,
      "learning_rate": 3.5546249538120454e-05,
      "loss": 0.1976,
      "step": 6681
    },
    {
      "epoch": 2.467503692762186,
      "grad_norm": 0.2911125421524048,
      "learning_rate": 3.552161596255696e-05,
      "loss": 0.1685,
      "step": 6682
    },
    {
      "epoch": 2.4678729689807977,
      "grad_norm": 0.3357445299625397,
      "learning_rate": 3.549698238699347e-05,
      "loss": 0.1976,
      "step": 6683
    },
    {
      "epoch": 2.4682422451994093,
      "grad_norm": 0.3251952826976776,
      "learning_rate": 3.547234881142998e-05,
      "loss": 0.2145,
      "step": 6684
    },
    {
      "epoch": 2.468611521418021,
      "grad_norm": 0.2840099334716797,
      "learning_rate": 3.5447715235866486e-05,
      "loss": 0.1705,
      "step": 6685
    },
    {
      "epoch": 2.468980797636632,
      "grad_norm": 0.25191012024879456,
      "learning_rate": 3.5423081660302994e-05,
      "loss": 0.1557,
      "step": 6686
    },
    {
      "epoch": 2.4693500738552436,
      "grad_norm": 0.27351176738739014,
      "learning_rate": 3.53984480847395e-05,
      "loss": 0.1552,
      "step": 6687
    },
    {
      "epoch": 2.4697193500738552,
      "grad_norm": 0.3187665641307831,
      "learning_rate": 3.537381450917601e-05,
      "loss": 0.1968,
      "step": 6688
    },
    {
      "epoch": 2.470088626292467,
      "grad_norm": 0.24932366609573364,
      "learning_rate": 3.534918093361251e-05,
      "loss": 0.1718,
      "step": 6689
    },
    {
      "epoch": 2.4704579025110784,
      "grad_norm": 0.2778686583042145,
      "learning_rate": 3.532454735804902e-05,
      "loss": 0.1613,
      "step": 6690
    },
    {
      "epoch": 2.4708271787296896,
      "grad_norm": 0.22028721868991852,
      "learning_rate": 3.5299913782485526e-05,
      "loss": 0.1686,
      "step": 6691
    },
    {
      "epoch": 2.471196454948301,
      "grad_norm": 0.30668461322784424,
      "learning_rate": 3.5275280206922034e-05,
      "loss": 0.1736,
      "step": 6692
    },
    {
      "epoch": 2.471565731166913,
      "grad_norm": 0.3277914524078369,
      "learning_rate": 3.525064663135854e-05,
      "loss": 0.1869,
      "step": 6693
    },
    {
      "epoch": 2.4719350073855244,
      "grad_norm": 0.28965499997138977,
      "learning_rate": 3.522601305579505e-05,
      "loss": 0.1597,
      "step": 6694
    },
    {
      "epoch": 2.472304283604136,
      "grad_norm": 0.2699742317199707,
      "learning_rate": 3.520137948023156e-05,
      "loss": 0.181,
      "step": 6695
    },
    {
      "epoch": 2.4726735598227476,
      "grad_norm": 0.24233360588550568,
      "learning_rate": 3.5176745904668065e-05,
      "loss": 0.1433,
      "step": 6696
    },
    {
      "epoch": 2.4730428360413588,
      "grad_norm": 0.32054582238197327,
      "learning_rate": 3.5152112329104566e-05,
      "loss": 0.1659,
      "step": 6697
    },
    {
      "epoch": 2.4734121122599704,
      "grad_norm": 0.2580467164516449,
      "learning_rate": 3.5127478753541074e-05,
      "loss": 0.1577,
      "step": 6698
    },
    {
      "epoch": 2.473781388478582,
      "grad_norm": 0.2700013816356659,
      "learning_rate": 3.510284517797758e-05,
      "loss": 0.1625,
      "step": 6699
    },
    {
      "epoch": 2.4741506646971936,
      "grad_norm": 0.2962573766708374,
      "learning_rate": 3.507821160241409e-05,
      "loss": 0.1878,
      "step": 6700
    },
    {
      "epoch": 2.4741506646971936,
      "eval_loss": 0.25414416193962097,
      "eval_runtime": 5.8547,
      "eval_samples_per_second": 8.54,
      "eval_steps_per_second": 1.196,
      "step": 6700
    },
    {
      "epoch": 2.474519940915805,
      "grad_norm": 0.2274537831544876,
      "learning_rate": 3.50535780268506e-05,
      "loss": 0.1547,
      "step": 6701
    },
    {
      "epoch": 2.4748892171344163,
      "grad_norm": 0.22232764959335327,
      "learning_rate": 3.5028944451287106e-05,
      "loss": 0.1338,
      "step": 6702
    },
    {
      "epoch": 2.475258493353028,
      "grad_norm": 0.253587007522583,
      "learning_rate": 3.5004310875723614e-05,
      "loss": 0.1682,
      "step": 6703
    },
    {
      "epoch": 2.4756277695716395,
      "grad_norm": 0.22102652490139008,
      "learning_rate": 3.497967730016012e-05,
      "loss": 0.1486,
      "step": 6704
    },
    {
      "epoch": 2.475997045790251,
      "grad_norm": 0.2886812686920166,
      "learning_rate": 3.495504372459662e-05,
      "loss": 0.1773,
      "step": 6705
    },
    {
      "epoch": 2.4763663220088628,
      "grad_norm": 0.2909698188304901,
      "learning_rate": 3.493041014903313e-05,
      "loss": 0.1819,
      "step": 6706
    },
    {
      "epoch": 2.4767355982274744,
      "grad_norm": 0.2511613368988037,
      "learning_rate": 3.490577657346964e-05,
      "loss": 0.1841,
      "step": 6707
    },
    {
      "epoch": 2.4771048744460855,
      "grad_norm": 0.28402179479599,
      "learning_rate": 3.4881142997906146e-05,
      "loss": 0.1709,
      "step": 6708
    },
    {
      "epoch": 2.477474150664697,
      "grad_norm": 0.24826565384864807,
      "learning_rate": 3.4856509422342654e-05,
      "loss": 0.1599,
      "step": 6709
    },
    {
      "epoch": 2.4778434268833087,
      "grad_norm": 0.28324204683303833,
      "learning_rate": 3.483187584677916e-05,
      "loss": 0.1763,
      "step": 6710
    },
    {
      "epoch": 2.4782127031019203,
      "grad_norm": 0.26067647337913513,
      "learning_rate": 3.480724227121567e-05,
      "loss": 0.1672,
      "step": 6711
    },
    {
      "epoch": 2.478581979320532,
      "grad_norm": 0.2550486624240875,
      "learning_rate": 3.478260869565218e-05,
      "loss": 0.1672,
      "step": 6712
    },
    {
      "epoch": 2.478951255539143,
      "grad_norm": 0.3083842992782593,
      "learning_rate": 3.475797512008868e-05,
      "loss": 0.1878,
      "step": 6713
    },
    {
      "epoch": 2.4793205317577547,
      "grad_norm": 0.32653459906578064,
      "learning_rate": 3.4733341544525187e-05,
      "loss": 0.1996,
      "step": 6714
    },
    {
      "epoch": 2.4796898079763663,
      "grad_norm": 0.24006037414073944,
      "learning_rate": 3.4708707968961694e-05,
      "loss": 0.1663,
      "step": 6715
    },
    {
      "epoch": 2.480059084194978,
      "grad_norm": 0.27943891286849976,
      "learning_rate": 3.46840743933982e-05,
      "loss": 0.1777,
      "step": 6716
    },
    {
      "epoch": 2.4804283604135895,
      "grad_norm": 0.2611193358898163,
      "learning_rate": 3.465944081783471e-05,
      "loss": 0.1849,
      "step": 6717
    },
    {
      "epoch": 2.480797636632201,
      "grad_norm": 0.2858585715293884,
      "learning_rate": 3.463480724227122e-05,
      "loss": 0.1606,
      "step": 6718
    },
    {
      "epoch": 2.4811669128508123,
      "grad_norm": 0.28120550513267517,
      "learning_rate": 3.4610173666707726e-05,
      "loss": 0.1864,
      "step": 6719
    },
    {
      "epoch": 2.481536189069424,
      "grad_norm": 0.26465487480163574,
      "learning_rate": 3.4585540091144234e-05,
      "loss": 0.1571,
      "step": 6720
    },
    {
      "epoch": 2.4819054652880355,
      "grad_norm": 0.29018014669418335,
      "learning_rate": 3.4560906515580735e-05,
      "loss": 0.1748,
      "step": 6721
    },
    {
      "epoch": 2.482274741506647,
      "grad_norm": 0.24420779943466187,
      "learning_rate": 3.453627294001724e-05,
      "loss": 0.1589,
      "step": 6722
    },
    {
      "epoch": 2.4826440177252587,
      "grad_norm": 0.2638775110244751,
      "learning_rate": 3.451163936445375e-05,
      "loss": 0.1472,
      "step": 6723
    },
    {
      "epoch": 2.48301329394387,
      "grad_norm": 0.3645317554473877,
      "learning_rate": 3.448700578889026e-05,
      "loss": 0.1584,
      "step": 6724
    },
    {
      "epoch": 2.4833825701624814,
      "grad_norm": 0.2596351206302643,
      "learning_rate": 3.4462372213326766e-05,
      "loss": 0.1658,
      "step": 6725
    },
    {
      "epoch": 2.483751846381093,
      "grad_norm": 0.24260061979293823,
      "learning_rate": 3.4437738637763274e-05,
      "loss": 0.1519,
      "step": 6726
    },
    {
      "epoch": 2.4841211225997046,
      "grad_norm": 0.26800626516342163,
      "learning_rate": 3.441310506219978e-05,
      "loss": 0.1424,
      "step": 6727
    },
    {
      "epoch": 2.4844903988183162,
      "grad_norm": 0.313998818397522,
      "learning_rate": 3.438847148663629e-05,
      "loss": 0.1915,
      "step": 6728
    },
    {
      "epoch": 2.484859675036928,
      "grad_norm": 0.2721114158630371,
      "learning_rate": 3.436383791107279e-05,
      "loss": 0.1569,
      "step": 6729
    },
    {
      "epoch": 2.485228951255539,
      "grad_norm": 0.29268237948417664,
      "learning_rate": 3.43392043355093e-05,
      "loss": 0.1669,
      "step": 6730
    },
    {
      "epoch": 2.4855982274741506,
      "grad_norm": 0.2563331425189972,
      "learning_rate": 3.431457075994581e-05,
      "loss": 0.1583,
      "step": 6731
    },
    {
      "epoch": 2.485967503692762,
      "grad_norm": 0.3128591477870941,
      "learning_rate": 3.4289937184382315e-05,
      "loss": 0.1856,
      "step": 6732
    },
    {
      "epoch": 2.486336779911374,
      "grad_norm": 0.28177300095558167,
      "learning_rate": 3.426530360881882e-05,
      "loss": 0.1829,
      "step": 6733
    },
    {
      "epoch": 2.4867060561299854,
      "grad_norm": 0.26359695196151733,
      "learning_rate": 3.424067003325533e-05,
      "loss": 0.1609,
      "step": 6734
    },
    {
      "epoch": 2.4870753323485966,
      "grad_norm": 0.29498758912086487,
      "learning_rate": 3.421603645769184e-05,
      "loss": 0.1838,
      "step": 6735
    },
    {
      "epoch": 2.487444608567208,
      "grad_norm": 0.2749401926994324,
      "learning_rate": 3.4191402882128346e-05,
      "loss": 0.1724,
      "step": 6736
    },
    {
      "epoch": 2.4878138847858198,
      "grad_norm": 0.2710501551628113,
      "learning_rate": 3.416676930656485e-05,
      "loss": 0.1519,
      "step": 6737
    },
    {
      "epoch": 2.4881831610044314,
      "grad_norm": 0.24372336268424988,
      "learning_rate": 3.4142135731001355e-05,
      "loss": 0.1658,
      "step": 6738
    },
    {
      "epoch": 2.488552437223043,
      "grad_norm": 0.3472476005554199,
      "learning_rate": 3.411750215543786e-05,
      "loss": 0.1919,
      "step": 6739
    },
    {
      "epoch": 2.4889217134416546,
      "grad_norm": 0.28122809529304504,
      "learning_rate": 3.409286857987437e-05,
      "loss": 0.1785,
      "step": 6740
    },
    {
      "epoch": 2.4892909896602657,
      "grad_norm": 0.3095254600048065,
      "learning_rate": 3.406823500431088e-05,
      "loss": 0.1736,
      "step": 6741
    },
    {
      "epoch": 2.4896602658788773,
      "grad_norm": 0.2329387664794922,
      "learning_rate": 3.4043601428747386e-05,
      "loss": 0.1461,
      "step": 6742
    },
    {
      "epoch": 2.490029542097489,
      "grad_norm": 0.2703981399536133,
      "learning_rate": 3.4018967853183894e-05,
      "loss": 0.1653,
      "step": 6743
    },
    {
      "epoch": 2.4903988183161005,
      "grad_norm": 0.29018229246139526,
      "learning_rate": 3.39943342776204e-05,
      "loss": 0.1673,
      "step": 6744
    },
    {
      "epoch": 2.490768094534712,
      "grad_norm": 0.2717873156070709,
      "learning_rate": 3.39697007020569e-05,
      "loss": 0.1883,
      "step": 6745
    },
    {
      "epoch": 2.4911373707533233,
      "grad_norm": 0.32639437913894653,
      "learning_rate": 3.394506712649341e-05,
      "loss": 0.1821,
      "step": 6746
    },
    {
      "epoch": 2.491506646971935,
      "grad_norm": 0.29910680651664734,
      "learning_rate": 3.392043355092992e-05,
      "loss": 0.1558,
      "step": 6747
    },
    {
      "epoch": 2.4918759231905465,
      "grad_norm": 0.2277168482542038,
      "learning_rate": 3.389579997536643e-05,
      "loss": 0.1595,
      "step": 6748
    },
    {
      "epoch": 2.492245199409158,
      "grad_norm": 0.26746252179145813,
      "learning_rate": 3.3871166399802935e-05,
      "loss": 0.1786,
      "step": 6749
    },
    {
      "epoch": 2.4926144756277697,
      "grad_norm": 0.263317346572876,
      "learning_rate": 3.384653282423944e-05,
      "loss": 0.1937,
      "step": 6750
    },
    {
      "epoch": 2.4926144756277697,
      "eval_loss": 0.2532287538051605,
      "eval_runtime": 5.8572,
      "eval_samples_per_second": 8.537,
      "eval_steps_per_second": 1.195,
      "step": 6750
    },
    {
      "epoch": 2.4929837518463813,
      "grad_norm": 0.26293596625328064,
      "learning_rate": 3.382189924867595e-05,
      "loss": 0.1526,
      "step": 6751
    },
    {
      "epoch": 2.4933530280649925,
      "grad_norm": 0.2595967948436737,
      "learning_rate": 3.379726567311245e-05,
      "loss": 0.1743,
      "step": 6752
    },
    {
      "epoch": 2.493722304283604,
      "grad_norm": 0.3618583381175995,
      "learning_rate": 3.377263209754896e-05,
      "loss": 0.1784,
      "step": 6753
    },
    {
      "epoch": 2.4940915805022157,
      "grad_norm": 0.27984780073165894,
      "learning_rate": 3.374799852198547e-05,
      "loss": 0.1768,
      "step": 6754
    },
    {
      "epoch": 2.4944608567208273,
      "grad_norm": 0.26132699847221375,
      "learning_rate": 3.3723364946421975e-05,
      "loss": 0.1723,
      "step": 6755
    },
    {
      "epoch": 2.494830132939439,
      "grad_norm": 0.3069780468940735,
      "learning_rate": 3.369873137085848e-05,
      "loss": 0.1671,
      "step": 6756
    },
    {
      "epoch": 2.49519940915805,
      "grad_norm": 0.30445432662963867,
      "learning_rate": 3.367409779529499e-05,
      "loss": 0.1514,
      "step": 6757
    },
    {
      "epoch": 2.4955686853766617,
      "grad_norm": 0.25964003801345825,
      "learning_rate": 3.36494642197315e-05,
      "loss": 0.1521,
      "step": 6758
    },
    {
      "epoch": 2.4959379615952733,
      "grad_norm": 0.2943986654281616,
      "learning_rate": 3.3624830644168007e-05,
      "loss": 0.1674,
      "step": 6759
    },
    {
      "epoch": 2.496307237813885,
      "grad_norm": 0.33864790201187134,
      "learning_rate": 3.360019706860451e-05,
      "loss": 0.1912,
      "step": 6760
    },
    {
      "epoch": 2.4966765140324965,
      "grad_norm": 0.26427775621414185,
      "learning_rate": 3.3575563493041016e-05,
      "loss": 0.1651,
      "step": 6761
    },
    {
      "epoch": 2.497045790251108,
      "grad_norm": 0.30823659896850586,
      "learning_rate": 3.355092991747752e-05,
      "loss": 0.1531,
      "step": 6762
    },
    {
      "epoch": 2.4974150664697192,
      "grad_norm": 0.2890568673610687,
      "learning_rate": 3.352629634191403e-05,
      "loss": 0.1771,
      "step": 6763
    },
    {
      "epoch": 2.497784342688331,
      "grad_norm": 0.2550124526023865,
      "learning_rate": 3.350166276635054e-05,
      "loss": 0.1583,
      "step": 6764
    },
    {
      "epoch": 2.4981536189069424,
      "grad_norm": 0.22946004569530487,
      "learning_rate": 3.347702919078705e-05,
      "loss": 0.1192,
      "step": 6765
    },
    {
      "epoch": 2.498522895125554,
      "grad_norm": 0.24715645611286163,
      "learning_rate": 3.3452395615223555e-05,
      "loss": 0.1557,
      "step": 6766
    },
    {
      "epoch": 2.498892171344165,
      "grad_norm": 0.21769702434539795,
      "learning_rate": 3.342776203966006e-05,
      "loss": 0.1542,
      "step": 6767
    },
    {
      "epoch": 2.499261447562777,
      "grad_norm": 0.2710095942020416,
      "learning_rate": 3.3403128464096564e-05,
      "loss": 0.1518,
      "step": 6768
    },
    {
      "epoch": 2.4996307237813884,
      "grad_norm": 0.2763799726963043,
      "learning_rate": 3.337849488853307e-05,
      "loss": 0.1562,
      "step": 6769
    },
    {
      "epoch": 2.5,
      "grad_norm": 0.28227436542510986,
      "learning_rate": 3.335386131296958e-05,
      "loss": 0.1817,
      "step": 6770
    },
    {
      "epoch": 2.5003692762186116,
      "grad_norm": 0.2575850784778595,
      "learning_rate": 3.332922773740609e-05,
      "loss": 0.1575,
      "step": 6771
    },
    {
      "epoch": 2.500738552437223,
      "grad_norm": 0.3194829523563385,
      "learning_rate": 3.3304594161842595e-05,
      "loss": 0.1685,
      "step": 6772
    },
    {
      "epoch": 2.501107828655835,
      "grad_norm": 0.31217771768569946,
      "learning_rate": 3.32799605862791e-05,
      "loss": 0.1753,
      "step": 6773
    },
    {
      "epoch": 2.501477104874446,
      "grad_norm": 0.24038547277450562,
      "learning_rate": 3.325532701071561e-05,
      "loss": 0.1728,
      "step": 6774
    },
    {
      "epoch": 2.5018463810930576,
      "grad_norm": 0.26459741592407227,
      "learning_rate": 3.323069343515212e-05,
      "loss": 0.1743,
      "step": 6775
    },
    {
      "epoch": 2.502215657311669,
      "grad_norm": 0.2336292862892151,
      "learning_rate": 3.320605985958862e-05,
      "loss": 0.1599,
      "step": 6776
    },
    {
      "epoch": 2.5025849335302808,
      "grad_norm": 0.29795417189598083,
      "learning_rate": 3.318142628402513e-05,
      "loss": 0.1805,
      "step": 6777
    },
    {
      "epoch": 2.502954209748892,
      "grad_norm": 0.25662103295326233,
      "learning_rate": 3.3156792708461636e-05,
      "loss": 0.1557,
      "step": 6778
    },
    {
      "epoch": 2.5033234859675035,
      "grad_norm": 0.29031679034233093,
      "learning_rate": 3.3132159132898144e-05,
      "loss": 0.1543,
      "step": 6779
    },
    {
      "epoch": 2.503692762186115,
      "grad_norm": 0.23315390944480896,
      "learning_rate": 3.310752555733465e-05,
      "loss": 0.1558,
      "step": 6780
    },
    {
      "epoch": 2.5040620384047267,
      "grad_norm": 0.3676280677318573,
      "learning_rate": 3.308289198177116e-05,
      "loss": 0.1895,
      "step": 6781
    },
    {
      "epoch": 2.5044313146233383,
      "grad_norm": 0.32318052649497986,
      "learning_rate": 3.305825840620767e-05,
      "loss": 0.1566,
      "step": 6782
    },
    {
      "epoch": 2.50480059084195,
      "grad_norm": 0.2895708978176117,
      "learning_rate": 3.3033624830644175e-05,
      "loss": 0.1607,
      "step": 6783
    },
    {
      "epoch": 2.5051698670605616,
      "grad_norm": 0.23804035782814026,
      "learning_rate": 3.3008991255080676e-05,
      "loss": 0.1519,
      "step": 6784
    },
    {
      "epoch": 2.5055391432791727,
      "grad_norm": 0.2892438471317291,
      "learning_rate": 3.2984357679517184e-05,
      "loss": 0.1555,
      "step": 6785
    },
    {
      "epoch": 2.5059084194977843,
      "grad_norm": 0.2570165693759918,
      "learning_rate": 3.295972410395369e-05,
      "loss": 0.1674,
      "step": 6786
    },
    {
      "epoch": 2.506277695716396,
      "grad_norm": 0.24235954880714417,
      "learning_rate": 3.29350905283902e-05,
      "loss": 0.1566,
      "step": 6787
    },
    {
      "epoch": 2.5066469719350075,
      "grad_norm": 0.2593287527561188,
      "learning_rate": 3.291045695282671e-05,
      "loss": 0.1506,
      "step": 6788
    },
    {
      "epoch": 2.5070162481536187,
      "grad_norm": 0.2729012370109558,
      "learning_rate": 3.2885823377263215e-05,
      "loss": 0.1516,
      "step": 6789
    },
    {
      "epoch": 2.5073855243722303,
      "grad_norm": 0.26710566878318787,
      "learning_rate": 3.286118980169972e-05,
      "loss": 0.1633,
      "step": 6790
    },
    {
      "epoch": 2.507754800590842,
      "grad_norm": 0.23136357963085175,
      "learning_rate": 3.2836556226136224e-05,
      "loss": 0.1524,
      "step": 6791
    },
    {
      "epoch": 2.5081240768094535,
      "grad_norm": 0.23863689601421356,
      "learning_rate": 3.281192265057273e-05,
      "loss": 0.1726,
      "step": 6792
    },
    {
      "epoch": 2.508493353028065,
      "grad_norm": 0.3121388554573059,
      "learning_rate": 3.278728907500923e-05,
      "loss": 0.1622,
      "step": 6793
    },
    {
      "epoch": 2.5088626292466767,
      "grad_norm": 0.22068506479263306,
      "learning_rate": 3.276265549944574e-05,
      "loss": 0.1393,
      "step": 6794
    },
    {
      "epoch": 2.5092319054652883,
      "grad_norm": 0.27219098806381226,
      "learning_rate": 3.273802192388225e-05,
      "loss": 0.1447,
      "step": 6795
    },
    {
      "epoch": 2.5096011816838995,
      "grad_norm": 0.273375928401947,
      "learning_rate": 3.271338834831876e-05,
      "loss": 0.1769,
      "step": 6796
    },
    {
      "epoch": 2.509970457902511,
      "grad_norm": 0.27035534381866455,
      "learning_rate": 3.2688754772755265e-05,
      "loss": 0.1785,
      "step": 6797
    },
    {
      "epoch": 2.5103397341211227,
      "grad_norm": 0.24310755729675293,
      "learning_rate": 3.266412119719177e-05,
      "loss": 0.1549,
      "step": 6798
    },
    {
      "epoch": 2.5107090103397343,
      "grad_norm": 0.2548632323741913,
      "learning_rate": 3.263948762162828e-05,
      "loss": 0.1599,
      "step": 6799
    },
    {
      "epoch": 2.5110782865583454,
      "grad_norm": 0.2806619107723236,
      "learning_rate": 3.261485404606479e-05,
      "loss": 0.1869,
      "step": 6800
    },
    {
      "epoch": 2.5110782865583454,
      "eval_loss": 0.2532981038093567,
      "eval_runtime": 5.8583,
      "eval_samples_per_second": 8.535,
      "eval_steps_per_second": 1.195,
      "step": 6800
    },
    {
      "epoch": 2.511447562776957,
      "grad_norm": 0.2640089988708496,
      "learning_rate": 3.259022047050129e-05,
      "loss": 0.1515,
      "step": 6801
    },
    {
      "epoch": 2.5118168389955686,
      "grad_norm": 0.32389867305755615,
      "learning_rate": 3.25655868949378e-05,
      "loss": 0.1627,
      "step": 6802
    },
    {
      "epoch": 2.5121861152141802,
      "grad_norm": 0.2391839176416397,
      "learning_rate": 3.2540953319374305e-05,
      "loss": 0.1321,
      "step": 6803
    },
    {
      "epoch": 2.512555391432792,
      "grad_norm": 0.3072627782821655,
      "learning_rate": 3.251631974381081e-05,
      "loss": 0.172,
      "step": 6804
    },
    {
      "epoch": 2.5129246676514034,
      "grad_norm": 0.2833667993545532,
      "learning_rate": 3.249168616824732e-05,
      "loss": 0.1767,
      "step": 6805
    },
    {
      "epoch": 2.513293943870015,
      "grad_norm": 0.26352235674858093,
      "learning_rate": 3.246705259268383e-05,
      "loss": 0.167,
      "step": 6806
    },
    {
      "epoch": 2.513663220088626,
      "grad_norm": 0.2721315920352936,
      "learning_rate": 3.2442419017120337e-05,
      "loss": 0.1652,
      "step": 6807
    },
    {
      "epoch": 2.514032496307238,
      "grad_norm": 0.25515738129615784,
      "learning_rate": 3.2417785441556844e-05,
      "loss": 0.1619,
      "step": 6808
    },
    {
      "epoch": 2.5144017725258494,
      "grad_norm": 0.2895338535308838,
      "learning_rate": 3.2393151865993346e-05,
      "loss": 0.2061,
      "step": 6809
    },
    {
      "epoch": 2.514771048744461,
      "grad_norm": 0.2826431393623352,
      "learning_rate": 3.236851829042985e-05,
      "loss": 0.1617,
      "step": 6810
    },
    {
      "epoch": 2.515140324963072,
      "grad_norm": 0.24174825847148895,
      "learning_rate": 3.234388471486636e-05,
      "loss": 0.1544,
      "step": 6811
    },
    {
      "epoch": 2.5155096011816838,
      "grad_norm": 0.23029516637325287,
      "learning_rate": 3.231925113930287e-05,
      "loss": 0.1458,
      "step": 6812
    },
    {
      "epoch": 2.5158788774002954,
      "grad_norm": 0.2716422975063324,
      "learning_rate": 3.229461756373938e-05,
      "loss": 0.141,
      "step": 6813
    },
    {
      "epoch": 2.516248153618907,
      "grad_norm": 0.2675827443599701,
      "learning_rate": 3.2269983988175885e-05,
      "loss": 0.1579,
      "step": 6814
    },
    {
      "epoch": 2.5166174298375186,
      "grad_norm": 0.24630804359912872,
      "learning_rate": 3.224535041261239e-05,
      "loss": 0.1351,
      "step": 6815
    },
    {
      "epoch": 2.51698670605613,
      "grad_norm": 0.2974760830402374,
      "learning_rate": 3.22207168370489e-05,
      "loss": 0.1833,
      "step": 6816
    },
    {
      "epoch": 2.5173559822747418,
      "grad_norm": 0.26326414942741394,
      "learning_rate": 3.21960832614854e-05,
      "loss": 0.1732,
      "step": 6817
    },
    {
      "epoch": 2.517725258493353,
      "grad_norm": 0.2742905616760254,
      "learning_rate": 3.217144968592191e-05,
      "loss": 0.1999,
      "step": 6818
    },
    {
      "epoch": 2.5180945347119645,
      "grad_norm": 0.26032716035842896,
      "learning_rate": 3.214681611035842e-05,
      "loss": 0.1575,
      "step": 6819
    },
    {
      "epoch": 2.518463810930576,
      "grad_norm": 0.2454797774553299,
      "learning_rate": 3.2122182534794925e-05,
      "loss": 0.1503,
      "step": 6820
    },
    {
      "epoch": 2.5188330871491877,
      "grad_norm": 0.2847702205181122,
      "learning_rate": 3.209754895923143e-05,
      "loss": 0.2125,
      "step": 6821
    },
    {
      "epoch": 2.519202363367799,
      "grad_norm": 0.24653670191764832,
      "learning_rate": 3.207291538366794e-05,
      "loss": 0.1612,
      "step": 6822
    },
    {
      "epoch": 2.5195716395864105,
      "grad_norm": 0.2155052125453949,
      "learning_rate": 3.204828180810445e-05,
      "loss": 0.166,
      "step": 6823
    },
    {
      "epoch": 2.519940915805022,
      "grad_norm": 0.2415972352027893,
      "learning_rate": 3.202364823254096e-05,
      "loss": 0.1758,
      "step": 6824
    },
    {
      "epoch": 2.5203101920236337,
      "grad_norm": 0.26860833168029785,
      "learning_rate": 3.199901465697746e-05,
      "loss": 0.164,
      "step": 6825
    },
    {
      "epoch": 2.5206794682422453,
      "grad_norm": 0.3211625814437866,
      "learning_rate": 3.1974381081413966e-05,
      "loss": 0.1647,
      "step": 6826
    },
    {
      "epoch": 2.521048744460857,
      "grad_norm": 0.21640796959400177,
      "learning_rate": 3.1949747505850474e-05,
      "loss": 0.1677,
      "step": 6827
    },
    {
      "epoch": 2.5214180206794685,
      "grad_norm": 0.23410780727863312,
      "learning_rate": 3.192511393028698e-05,
      "loss": 0.1508,
      "step": 6828
    },
    {
      "epoch": 2.5217872968980797,
      "grad_norm": 0.28496286273002625,
      "learning_rate": 3.190048035472349e-05,
      "loss": 0.1855,
      "step": 6829
    },
    {
      "epoch": 2.5221565731166913,
      "grad_norm": 0.23215197026729584,
      "learning_rate": 3.187584677916e-05,
      "loss": 0.1513,
      "step": 6830
    },
    {
      "epoch": 2.522525849335303,
      "grad_norm": 0.27465683221817017,
      "learning_rate": 3.1851213203596505e-05,
      "loss": 0.1772,
      "step": 6831
    },
    {
      "epoch": 2.5228951255539145,
      "grad_norm": 0.28835806250572205,
      "learning_rate": 3.182657962803301e-05,
      "loss": 0.162,
      "step": 6832
    },
    {
      "epoch": 2.5232644017725256,
      "grad_norm": 0.24527624249458313,
      "learning_rate": 3.1801946052469514e-05,
      "loss": 0.1605,
      "step": 6833
    },
    {
      "epoch": 2.5236336779911372,
      "grad_norm": 0.30247774720191956,
      "learning_rate": 3.177731247690602e-05,
      "loss": 0.1915,
      "step": 6834
    },
    {
      "epoch": 2.524002954209749,
      "grad_norm": 0.25916823744773865,
      "learning_rate": 3.175267890134253e-05,
      "loss": 0.1831,
      "step": 6835
    },
    {
      "epoch": 2.5243722304283605,
      "grad_norm": 0.31635192036628723,
      "learning_rate": 3.172804532577904e-05,
      "loss": 0.1859,
      "step": 6836
    },
    {
      "epoch": 2.524741506646972,
      "grad_norm": 0.2717903256416321,
      "learning_rate": 3.1703411750215545e-05,
      "loss": 0.1466,
      "step": 6837
    },
    {
      "epoch": 2.5251107828655837,
      "grad_norm": 0.2232266366481781,
      "learning_rate": 3.167877817465205e-05,
      "loss": 0.1437,
      "step": 6838
    },
    {
      "epoch": 2.525480059084195,
      "grad_norm": 0.2692876160144806,
      "learning_rate": 3.165414459908856e-05,
      "loss": 0.1638,
      "step": 6839
    },
    {
      "epoch": 2.5258493353028064,
      "grad_norm": 0.2996908128261566,
      "learning_rate": 3.162951102352507e-05,
      "loss": 0.1624,
      "step": 6840
    },
    {
      "epoch": 2.526218611521418,
      "grad_norm": 0.27915018796920776,
      "learning_rate": 3.160487744796157e-05,
      "loss": 0.1588,
      "step": 6841
    },
    {
      "epoch": 2.5265878877400296,
      "grad_norm": 0.25712302327156067,
      "learning_rate": 3.158024387239808e-05,
      "loss": 0.1813,
      "step": 6842
    },
    {
      "epoch": 2.5269571639586412,
      "grad_norm": 0.2830292582511902,
      "learning_rate": 3.1555610296834586e-05,
      "loss": 0.2051,
      "step": 6843
    },
    {
      "epoch": 2.5273264401772524,
      "grad_norm": 0.31948432326316833,
      "learning_rate": 3.1530976721271094e-05,
      "loss": 0.1748,
      "step": 6844
    },
    {
      "epoch": 2.527695716395864,
      "grad_norm": 0.279136598110199,
      "learning_rate": 3.15063431457076e-05,
      "loss": 0.1643,
      "step": 6845
    },
    {
      "epoch": 2.5280649926144756,
      "grad_norm": 0.284660279750824,
      "learning_rate": 3.148170957014411e-05,
      "loss": 0.166,
      "step": 6846
    },
    {
      "epoch": 2.528434268833087,
      "grad_norm": 0.3152167499065399,
      "learning_rate": 3.145707599458062e-05,
      "loss": 0.16,
      "step": 6847
    },
    {
      "epoch": 2.528803545051699,
      "grad_norm": 0.26597416400909424,
      "learning_rate": 3.1432442419017125e-05,
      "loss": 0.189,
      "step": 6848
    },
    {
      "epoch": 2.5291728212703104,
      "grad_norm": 0.2872331738471985,
      "learning_rate": 3.1407808843453626e-05,
      "loss": 0.1528,
      "step": 6849
    },
    {
      "epoch": 2.5295420974889216,
      "grad_norm": 0.2678552567958832,
      "learning_rate": 3.1383175267890134e-05,
      "loss": 0.1842,
      "step": 6850
    },
    {
      "epoch": 2.5295420974889216,
      "eval_loss": 0.2541038990020752,
      "eval_runtime": 5.8479,
      "eval_samples_per_second": 8.55,
      "eval_steps_per_second": 1.197,
      "step": 6850
    },
    {
      "epoch": 2.529911373707533,
      "grad_norm": 0.2524714469909668,
      "learning_rate": 3.135854169232664e-05,
      "loss": 0.1701,
      "step": 6851
    },
    {
      "epoch": 2.5302806499261448,
      "grad_norm": 0.3068491220474243,
      "learning_rate": 3.133390811676315e-05,
      "loss": 0.1635,
      "step": 6852
    },
    {
      "epoch": 2.5306499261447564,
      "grad_norm": 0.30152344703674316,
      "learning_rate": 3.130927454119966e-05,
      "loss": 0.162,
      "step": 6853
    },
    {
      "epoch": 2.5310192023633675,
      "grad_norm": 0.2503848671913147,
      "learning_rate": 3.1284640965636165e-05,
      "loss": 0.1773,
      "step": 6854
    },
    {
      "epoch": 2.531388478581979,
      "grad_norm": 0.281986266374588,
      "learning_rate": 3.126000739007267e-05,
      "loss": 0.1672,
      "step": 6855
    },
    {
      "epoch": 2.5317577548005907,
      "grad_norm": 0.29041624069213867,
      "learning_rate": 3.123537381450918e-05,
      "loss": 0.1654,
      "step": 6856
    },
    {
      "epoch": 2.5321270310192023,
      "grad_norm": 0.28015342354774475,
      "learning_rate": 3.121074023894568e-05,
      "loss": 0.1472,
      "step": 6857
    },
    {
      "epoch": 2.532496307237814,
      "grad_norm": 0.27722394466400146,
      "learning_rate": 3.118610666338219e-05,
      "loss": 0.1665,
      "step": 6858
    },
    {
      "epoch": 2.5328655834564255,
      "grad_norm": 0.29715755581855774,
      "learning_rate": 3.11614730878187e-05,
      "loss": 0.17,
      "step": 6859
    },
    {
      "epoch": 2.533234859675037,
      "grad_norm": 0.3651770353317261,
      "learning_rate": 3.1136839512255206e-05,
      "loss": 0.1896,
      "step": 6860
    },
    {
      "epoch": 2.5336041358936483,
      "grad_norm": 0.2718464136123657,
      "learning_rate": 3.1112205936691714e-05,
      "loss": 0.1653,
      "step": 6861
    },
    {
      "epoch": 2.53397341211226,
      "grad_norm": 0.22726774215698242,
      "learning_rate": 3.108757236112822e-05,
      "loss": 0.1566,
      "step": 6862
    },
    {
      "epoch": 2.5343426883308715,
      "grad_norm": 0.2652910053730011,
      "learning_rate": 3.106293878556473e-05,
      "loss": 0.1467,
      "step": 6863
    },
    {
      "epoch": 2.534711964549483,
      "grad_norm": 0.22904013097286224,
      "learning_rate": 3.103830521000123e-05,
      "loss": 0.1445,
      "step": 6864
    },
    {
      "epoch": 2.5350812407680943,
      "grad_norm": 0.33844199776649475,
      "learning_rate": 3.101367163443774e-05,
      "loss": 0.1885,
      "step": 6865
    },
    {
      "epoch": 2.535450516986706,
      "grad_norm": 0.24840067327022552,
      "learning_rate": 3.0989038058874246e-05,
      "loss": 0.1435,
      "step": 6866
    },
    {
      "epoch": 2.5358197932053175,
      "grad_norm": 0.31841030716896057,
      "learning_rate": 3.0964404483310754e-05,
      "loss": 0.1425,
      "step": 6867
    },
    {
      "epoch": 2.536189069423929,
      "grad_norm": 0.2922656536102295,
      "learning_rate": 3.093977090774726e-05,
      "loss": 0.1536,
      "step": 6868
    },
    {
      "epoch": 2.5365583456425407,
      "grad_norm": 0.318960577249527,
      "learning_rate": 3.091513733218377e-05,
      "loss": 0.1776,
      "step": 6869
    },
    {
      "epoch": 2.5369276218611523,
      "grad_norm": 0.2846389710903168,
      "learning_rate": 3.089050375662028e-05,
      "loss": 0.1624,
      "step": 6870
    },
    {
      "epoch": 2.537296898079764,
      "grad_norm": 0.30565300583839417,
      "learning_rate": 3.0865870181056786e-05,
      "loss": 0.1778,
      "step": 6871
    },
    {
      "epoch": 2.537666174298375,
      "grad_norm": 0.24448475241661072,
      "learning_rate": 3.084123660549329e-05,
      "loss": 0.1628,
      "step": 6872
    },
    {
      "epoch": 2.5380354505169866,
      "grad_norm": 0.30311644077301025,
      "learning_rate": 3.0816603029929795e-05,
      "loss": 0.1563,
      "step": 6873
    },
    {
      "epoch": 2.5384047267355982,
      "grad_norm": 0.2623513340950012,
      "learning_rate": 3.07919694543663e-05,
      "loss": 0.1935,
      "step": 6874
    },
    {
      "epoch": 2.53877400295421,
      "grad_norm": 0.2626889646053314,
      "learning_rate": 3.076733587880281e-05,
      "loss": 0.1687,
      "step": 6875
    },
    {
      "epoch": 2.539143279172821,
      "grad_norm": 0.3092837333679199,
      "learning_rate": 3.074270230323932e-05,
      "loss": 0.1792,
      "step": 6876
    },
    {
      "epoch": 2.5395125553914326,
      "grad_norm": 0.30462512373924255,
      "learning_rate": 3.0718068727675826e-05,
      "loss": 0.1886,
      "step": 6877
    },
    {
      "epoch": 2.539881831610044,
      "grad_norm": 0.3408518135547638,
      "learning_rate": 3.0693435152112334e-05,
      "loss": 0.1922,
      "step": 6878
    },
    {
      "epoch": 2.540251107828656,
      "grad_norm": 0.2713509798049927,
      "learning_rate": 3.066880157654884e-05,
      "loss": 0.1636,
      "step": 6879
    },
    {
      "epoch": 2.5406203840472674,
      "grad_norm": 0.31693035364151,
      "learning_rate": 3.064416800098534e-05,
      "loss": 0.1506,
      "step": 6880
    },
    {
      "epoch": 2.540989660265879,
      "grad_norm": 0.25451111793518066,
      "learning_rate": 3.061953442542185e-05,
      "loss": 0.1571,
      "step": 6881
    },
    {
      "epoch": 2.5413589364844906,
      "grad_norm": 0.2906498610973358,
      "learning_rate": 3.059490084985836e-05,
      "loss": 0.156,
      "step": 6882
    },
    {
      "epoch": 2.541728212703102,
      "grad_norm": 0.30169451236724854,
      "learning_rate": 3.0570267274294866e-05,
      "loss": 0.1458,
      "step": 6883
    },
    {
      "epoch": 2.5420974889217134,
      "grad_norm": 0.41555091738700867,
      "learning_rate": 3.0545633698731374e-05,
      "loss": 0.1743,
      "step": 6884
    },
    {
      "epoch": 2.542466765140325,
      "grad_norm": 0.2576081156730652,
      "learning_rate": 3.052100012316788e-05,
      "loss": 0.1802,
      "step": 6885
    },
    {
      "epoch": 2.5428360413589366,
      "grad_norm": 0.2756846249103546,
      "learning_rate": 3.0496366547604387e-05,
      "loss": 0.149,
      "step": 6886
    },
    {
      "epoch": 2.5432053175775478,
      "grad_norm": 0.30317094922065735,
      "learning_rate": 3.0471732972040894e-05,
      "loss": 0.162,
      "step": 6887
    },
    {
      "epoch": 2.5435745937961594,
      "grad_norm": 0.2918936014175415,
      "learning_rate": 3.0447099396477402e-05,
      "loss": 0.1668,
      "step": 6888
    },
    {
      "epoch": 2.543943870014771,
      "grad_norm": 0.35052746534347534,
      "learning_rate": 3.042246582091391e-05,
      "loss": 0.1899,
      "step": 6889
    },
    {
      "epoch": 2.5443131462333826,
      "grad_norm": 0.24106165766716003,
      "learning_rate": 3.0397832245350415e-05,
      "loss": 0.1453,
      "step": 6890
    },
    {
      "epoch": 2.544682422451994,
      "grad_norm": 0.3153793513774872,
      "learning_rate": 3.0373198669786923e-05,
      "loss": 0.1972,
      "step": 6891
    },
    {
      "epoch": 2.5450516986706058,
      "grad_norm": 0.2803950309753418,
      "learning_rate": 3.034856509422343e-05,
      "loss": 0.1575,
      "step": 6892
    },
    {
      "epoch": 2.5454209748892174,
      "grad_norm": 0.2646368145942688,
      "learning_rate": 3.0323931518659938e-05,
      "loss": 0.1513,
      "step": 6893
    },
    {
      "epoch": 2.5457902511078285,
      "grad_norm": 0.257588654756546,
      "learning_rate": 3.0299297943096443e-05,
      "loss": 0.181,
      "step": 6894
    },
    {
      "epoch": 2.54615952732644,
      "grad_norm": 0.26178112626075745,
      "learning_rate": 3.027466436753295e-05,
      "loss": 0.1427,
      "step": 6895
    },
    {
      "epoch": 2.5465288035450517,
      "grad_norm": 0.24951210618019104,
      "learning_rate": 3.025003079196946e-05,
      "loss": 0.1484,
      "step": 6896
    },
    {
      "epoch": 2.5468980797636633,
      "grad_norm": 0.27104729413986206,
      "learning_rate": 3.0225397216405966e-05,
      "loss": 0.1462,
      "step": 6897
    },
    {
      "epoch": 2.5472673559822745,
      "grad_norm": 0.31783542037010193,
      "learning_rate": 3.020076364084247e-05,
      "loss": 0.177,
      "step": 6898
    },
    {
      "epoch": 2.547636632200886,
      "grad_norm": 0.29903313517570496,
      "learning_rate": 3.017613006527898e-05,
      "loss": 0.17,
      "step": 6899
    },
    {
      "epoch": 2.5480059084194977,
      "grad_norm": 0.28590208292007446,
      "learning_rate": 3.0151496489715487e-05,
      "loss": 0.1775,
      "step": 6900
    },
    {
      "epoch": 2.5480059084194977,
      "eval_loss": 0.2523267865180969,
      "eval_runtime": 5.8514,
      "eval_samples_per_second": 8.545,
      "eval_steps_per_second": 1.196,
      "step": 6900
    },
    {
      "epoch": 2.5483751846381093,
      "grad_norm": 0.28440597653388977,
      "learning_rate": 3.0126862914151994e-05,
      "loss": 0.174,
      "step": 6901
    },
    {
      "epoch": 2.548744460856721,
      "grad_norm": 0.2633342742919922,
      "learning_rate": 3.01022293385885e-05,
      "loss": 0.1592,
      "step": 6902
    },
    {
      "epoch": 2.5491137370753325,
      "grad_norm": 0.3877559304237366,
      "learning_rate": 3.0077595763025007e-05,
      "loss": 0.1874,
      "step": 6903
    },
    {
      "epoch": 2.549483013293944,
      "grad_norm": 0.2826557755470276,
      "learning_rate": 3.0052962187461515e-05,
      "loss": 0.1771,
      "step": 6904
    },
    {
      "epoch": 2.5498522895125553,
      "grad_norm": 0.2666883170604706,
      "learning_rate": 3.0028328611898022e-05,
      "loss": 0.1739,
      "step": 6905
    },
    {
      "epoch": 2.550221565731167,
      "grad_norm": 0.26083993911743164,
      "learning_rate": 3.0003695036334527e-05,
      "loss": 0.1679,
      "step": 6906
    },
    {
      "epoch": 2.5505908419497785,
      "grad_norm": 0.2829050123691559,
      "learning_rate": 2.9979061460771028e-05,
      "loss": 0.1608,
      "step": 6907
    },
    {
      "epoch": 2.55096011816839,
      "grad_norm": 0.2269107848405838,
      "learning_rate": 2.9954427885207536e-05,
      "loss": 0.1648,
      "step": 6908
    },
    {
      "epoch": 2.5513293943870012,
      "grad_norm": 0.35450270771980286,
      "learning_rate": 2.9929794309644044e-05,
      "loss": 0.1604,
      "step": 6909
    },
    {
      "epoch": 2.551698670605613,
      "grad_norm": 0.6864081621170044,
      "learning_rate": 2.990516073408055e-05,
      "loss": 0.1959,
      "step": 6910
    },
    {
      "epoch": 2.5520679468242244,
      "grad_norm": 0.2551758289337158,
      "learning_rate": 2.9880527158517056e-05,
      "loss": 0.1619,
      "step": 6911
    },
    {
      "epoch": 2.552437223042836,
      "grad_norm": 0.3041122853755951,
      "learning_rate": 2.9855893582953564e-05,
      "loss": 0.164,
      "step": 6912
    },
    {
      "epoch": 2.5528064992614476,
      "grad_norm": 0.2321958690881729,
      "learning_rate": 2.9831260007390072e-05,
      "loss": 0.1592,
      "step": 6913
    },
    {
      "epoch": 2.5531757754800593,
      "grad_norm": 0.23633337020874023,
      "learning_rate": 2.980662643182658e-05,
      "loss": 0.1605,
      "step": 6914
    },
    {
      "epoch": 2.553545051698671,
      "grad_norm": 0.3471091389656067,
      "learning_rate": 2.9781992856263084e-05,
      "loss": 0.1808,
      "step": 6915
    },
    {
      "epoch": 2.553914327917282,
      "grad_norm": 0.25309813022613525,
      "learning_rate": 2.9757359280699592e-05,
      "loss": 0.1688,
      "step": 6916
    },
    {
      "epoch": 2.5542836041358936,
      "grad_norm": 0.2922281324863434,
      "learning_rate": 2.97327257051361e-05,
      "loss": 0.171,
      "step": 6917
    },
    {
      "epoch": 2.554652880354505,
      "grad_norm": 0.3181562125682831,
      "learning_rate": 2.9708092129572608e-05,
      "loss": 0.1846,
      "step": 6918
    },
    {
      "epoch": 2.555022156573117,
      "grad_norm": 0.23554129898548126,
      "learning_rate": 2.9683458554009112e-05,
      "loss": 0.1368,
      "step": 6919
    },
    {
      "epoch": 2.555391432791728,
      "grad_norm": 0.24824005365371704,
      "learning_rate": 2.965882497844562e-05,
      "loss": 0.1592,
      "step": 6920
    },
    {
      "epoch": 2.5557607090103396,
      "grad_norm": 0.34159722924232483,
      "learning_rate": 2.9634191402882128e-05,
      "loss": 0.1719,
      "step": 6921
    },
    {
      "epoch": 2.556129985228951,
      "grad_norm": 0.278098464012146,
      "learning_rate": 2.9609557827318636e-05,
      "loss": 0.1653,
      "step": 6922
    },
    {
      "epoch": 2.556499261447563,
      "grad_norm": 0.21666814386844635,
      "learning_rate": 2.958492425175514e-05,
      "loss": 0.1458,
      "step": 6923
    },
    {
      "epoch": 2.5568685376661744,
      "grad_norm": 0.26115965843200684,
      "learning_rate": 2.9560290676191648e-05,
      "loss": 0.1719,
      "step": 6924
    },
    {
      "epoch": 2.557237813884786,
      "grad_norm": 0.2852897346019745,
      "learning_rate": 2.9535657100628156e-05,
      "loss": 0.149,
      "step": 6925
    },
    {
      "epoch": 2.5576070901033976,
      "grad_norm": 0.27047446370124817,
      "learning_rate": 2.9511023525064664e-05,
      "loss": 0.182,
      "step": 6926
    },
    {
      "epoch": 2.5579763663220088,
      "grad_norm": 0.2714018225669861,
      "learning_rate": 2.948638994950117e-05,
      "loss": 0.1663,
      "step": 6927
    },
    {
      "epoch": 2.5583456425406204,
      "grad_norm": 0.3140382170677185,
      "learning_rate": 2.9461756373937676e-05,
      "loss": 0.1656,
      "step": 6928
    },
    {
      "epoch": 2.558714918759232,
      "grad_norm": 0.3167521357536316,
      "learning_rate": 2.9437122798374184e-05,
      "loss": 0.1816,
      "step": 6929
    },
    {
      "epoch": 2.5590841949778436,
      "grad_norm": 0.24346637725830078,
      "learning_rate": 2.9412489222810692e-05,
      "loss": 0.1469,
      "step": 6930
    },
    {
      "epoch": 2.5594534711964547,
      "grad_norm": 0.28615519404411316,
      "learning_rate": 2.9387855647247196e-05,
      "loss": 0.1812,
      "step": 6931
    },
    {
      "epoch": 2.5598227474150663,
      "grad_norm": 0.2468711882829666,
      "learning_rate": 2.9363222071683704e-05,
      "loss": 0.1302,
      "step": 6932
    },
    {
      "epoch": 2.560192023633678,
      "grad_norm": 0.27801457047462463,
      "learning_rate": 2.9338588496120212e-05,
      "loss": 0.1717,
      "step": 6933
    },
    {
      "epoch": 2.5605612998522895,
      "grad_norm": 0.3109159469604492,
      "learning_rate": 2.931395492055672e-05,
      "loss": 0.1731,
      "step": 6934
    },
    {
      "epoch": 2.560930576070901,
      "grad_norm": 0.24855497479438782,
      "learning_rate": 2.9289321344993224e-05,
      "loss": 0.1478,
      "step": 6935
    },
    {
      "epoch": 2.5612998522895127,
      "grad_norm": 0.2573830485343933,
      "learning_rate": 2.9264687769429732e-05,
      "loss": 0.1615,
      "step": 6936
    },
    {
      "epoch": 2.5616691285081243,
      "grad_norm": 0.21040625870227814,
      "learning_rate": 2.924005419386624e-05,
      "loss": 0.1706,
      "step": 6937
    },
    {
      "epoch": 2.5620384047267355,
      "grad_norm": 0.3306921124458313,
      "learning_rate": 2.9215420618302748e-05,
      "loss": 0.1848,
      "step": 6938
    },
    {
      "epoch": 2.562407680945347,
      "grad_norm": 0.3369297981262207,
      "learning_rate": 2.9190787042739253e-05,
      "loss": 0.1613,
      "step": 6939
    },
    {
      "epoch": 2.5627769571639587,
      "grad_norm": 0.2304317057132721,
      "learning_rate": 2.916615346717576e-05,
      "loss": 0.1691,
      "step": 6940
    },
    {
      "epoch": 2.5631462333825703,
      "grad_norm": 0.26104792952537537,
      "learning_rate": 2.9141519891612268e-05,
      "loss": 0.1739,
      "step": 6941
    },
    {
      "epoch": 2.5635155096011815,
      "grad_norm": 0.25000765919685364,
      "learning_rate": 2.9116886316048776e-05,
      "loss": 0.1554,
      "step": 6942
    },
    {
      "epoch": 2.563884785819793,
      "grad_norm": 0.2503049075603485,
      "learning_rate": 2.909225274048528e-05,
      "loss": 0.1479,
      "step": 6943
    },
    {
      "epoch": 2.5642540620384047,
      "grad_norm": 0.31189024448394775,
      "learning_rate": 2.906761916492179e-05,
      "loss": 0.1662,
      "step": 6944
    },
    {
      "epoch": 2.5646233382570163,
      "grad_norm": 0.2347639799118042,
      "learning_rate": 2.9042985589358296e-05,
      "loss": 0.158,
      "step": 6945
    },
    {
      "epoch": 2.564992614475628,
      "grad_norm": 0.24234919250011444,
      "learning_rate": 2.9018352013794804e-05,
      "loss": 0.1726,
      "step": 6946
    },
    {
      "epoch": 2.5653618906942395,
      "grad_norm": 0.2983647584915161,
      "learning_rate": 2.899371843823131e-05,
      "loss": 0.162,
      "step": 6947
    },
    {
      "epoch": 2.565731166912851,
      "grad_norm": 0.2836242616176605,
      "learning_rate": 2.8969084862667817e-05,
      "loss": 0.1636,
      "step": 6948
    },
    {
      "epoch": 2.5661004431314622,
      "grad_norm": 0.2699525058269501,
      "learning_rate": 2.8944451287104324e-05,
      "loss": 0.1683,
      "step": 6949
    },
    {
      "epoch": 2.566469719350074,
      "grad_norm": 0.32303038239479065,
      "learning_rate": 2.8919817711540832e-05,
      "loss": 0.1778,
      "step": 6950
    },
    {
      "epoch": 2.566469719350074,
      "eval_loss": 0.2497435361146927,
      "eval_runtime": 5.8575,
      "eval_samples_per_second": 8.536,
      "eval_steps_per_second": 1.195,
      "step": 6950
    },
    {
      "epoch": 2.5668389955686854,
      "grad_norm": 0.2764846384525299,
      "learning_rate": 2.8895184135977337e-05,
      "loss": 0.1607,
      "step": 6951
    },
    {
      "epoch": 2.567208271787297,
      "grad_norm": 0.2732771039009094,
      "learning_rate": 2.8870550560413845e-05,
      "loss": 0.1574,
      "step": 6952
    },
    {
      "epoch": 2.567577548005908,
      "grad_norm": 0.31445491313934326,
      "learning_rate": 2.8845916984850352e-05,
      "loss": 0.1827,
      "step": 6953
    },
    {
      "epoch": 2.56794682422452,
      "grad_norm": 0.24035432934761047,
      "learning_rate": 2.882128340928686e-05,
      "loss": 0.1553,
      "step": 6954
    },
    {
      "epoch": 2.5683161004431314,
      "grad_norm": 0.2663872539997101,
      "learning_rate": 2.8796649833723365e-05,
      "loss": 0.1644,
      "step": 6955
    },
    {
      "epoch": 2.568685376661743,
      "grad_norm": 0.27982500195503235,
      "learning_rate": 2.8772016258159873e-05,
      "loss": 0.1553,
      "step": 6956
    },
    {
      "epoch": 2.5690546528803546,
      "grad_norm": 0.5572454333305359,
      "learning_rate": 2.874738268259638e-05,
      "loss": 0.1526,
      "step": 6957
    },
    {
      "epoch": 2.569423929098966,
      "grad_norm": 0.2863433361053467,
      "learning_rate": 2.872274910703289e-05,
      "loss": 0.1588,
      "step": 6958
    },
    {
      "epoch": 2.569793205317578,
      "grad_norm": 0.3161817491054535,
      "learning_rate": 2.8698115531469393e-05,
      "loss": 0.222,
      "step": 6959
    },
    {
      "epoch": 2.570162481536189,
      "grad_norm": 0.28113853931427,
      "learning_rate": 2.86734819559059e-05,
      "loss": 0.1817,
      "step": 6960
    },
    {
      "epoch": 2.5705317577548006,
      "grad_norm": 0.23445942997932434,
      "learning_rate": 2.864884838034241e-05,
      "loss": 0.1497,
      "step": 6961
    },
    {
      "epoch": 2.570901033973412,
      "grad_norm": 0.3076701760292053,
      "learning_rate": 2.8624214804778916e-05,
      "loss": 0.1515,
      "step": 6962
    },
    {
      "epoch": 2.571270310192024,
      "grad_norm": 0.26474785804748535,
      "learning_rate": 2.859958122921542e-05,
      "loss": 0.1587,
      "step": 6963
    },
    {
      "epoch": 2.571639586410635,
      "grad_norm": 0.3620382249355316,
      "learning_rate": 2.857494765365193e-05,
      "loss": 0.1699,
      "step": 6964
    },
    {
      "epoch": 2.5720088626292466,
      "grad_norm": 0.2817908227443695,
      "learning_rate": 2.8550314078088437e-05,
      "loss": 0.1565,
      "step": 6965
    },
    {
      "epoch": 2.572378138847858,
      "grad_norm": 0.27126121520996094,
      "learning_rate": 2.8525680502524945e-05,
      "loss": 0.174,
      "step": 6966
    },
    {
      "epoch": 2.5727474150664698,
      "grad_norm": 0.27937519550323486,
      "learning_rate": 2.850104692696145e-05,
      "loss": 0.1773,
      "step": 6967
    },
    {
      "epoch": 2.5731166912850814,
      "grad_norm": 0.24077384173870087,
      "learning_rate": 2.8476413351397957e-05,
      "loss": 0.1395,
      "step": 6968
    },
    {
      "epoch": 2.573485967503693,
      "grad_norm": 0.26432493329048157,
      "learning_rate": 2.8451779775834465e-05,
      "loss": 0.177,
      "step": 6969
    },
    {
      "epoch": 2.573855243722304,
      "grad_norm": 0.2882770299911499,
      "learning_rate": 2.8427146200270973e-05,
      "loss": 0.17,
      "step": 6970
    },
    {
      "epoch": 2.5742245199409157,
      "grad_norm": 0.24378398060798645,
      "learning_rate": 2.8402512624707477e-05,
      "loss": 0.1655,
      "step": 6971
    },
    {
      "epoch": 2.5745937961595273,
      "grad_norm": 0.27990424633026123,
      "learning_rate": 2.8377879049143985e-05,
      "loss": 0.1644,
      "step": 6972
    },
    {
      "epoch": 2.574963072378139,
      "grad_norm": 0.258092999458313,
      "learning_rate": 2.8353245473580493e-05,
      "loss": 0.1562,
      "step": 6973
    },
    {
      "epoch": 2.5753323485967505,
      "grad_norm": 0.2670189142227173,
      "learning_rate": 2.8328611898017e-05,
      "loss": 0.1574,
      "step": 6974
    },
    {
      "epoch": 2.5757016248153617,
      "grad_norm": 0.23461149632930756,
      "learning_rate": 2.8303978322453505e-05,
      "loss": 0.1608,
      "step": 6975
    },
    {
      "epoch": 2.5760709010339733,
      "grad_norm": 0.2981928884983063,
      "learning_rate": 2.8279344746890013e-05,
      "loss": 0.1704,
      "step": 6976
    },
    {
      "epoch": 2.576440177252585,
      "grad_norm": 0.27135443687438965,
      "learning_rate": 2.825471117132652e-05,
      "loss": 0.1626,
      "step": 6977
    },
    {
      "epoch": 2.5768094534711965,
      "grad_norm": 0.25236204266548157,
      "learning_rate": 2.8230077595763025e-05,
      "loss": 0.1479,
      "step": 6978
    },
    {
      "epoch": 2.577178729689808,
      "grad_norm": 0.26014214754104614,
      "learning_rate": 2.8205444020199533e-05,
      "loss": 0.156,
      "step": 6979
    },
    {
      "epoch": 2.5775480059084197,
      "grad_norm": 0.3150984048843384,
      "learning_rate": 2.818081044463604e-05,
      "loss": 0.1758,
      "step": 6980
    },
    {
      "epoch": 2.577917282127031,
      "grad_norm": 0.31969812512397766,
      "learning_rate": 2.815617686907255e-05,
      "loss": 0.1625,
      "step": 6981
    },
    {
      "epoch": 2.5782865583456425,
      "grad_norm": 0.25959834456443787,
      "learning_rate": 2.8131543293509053e-05,
      "loss": 0.1536,
      "step": 6982
    },
    {
      "epoch": 2.578655834564254,
      "grad_norm": 0.29799771308898926,
      "learning_rate": 2.810690971794556e-05,
      "loss": 0.1511,
      "step": 6983
    },
    {
      "epoch": 2.5790251107828657,
      "grad_norm": 0.28962835669517517,
      "learning_rate": 2.808227614238207e-05,
      "loss": 0.1573,
      "step": 6984
    },
    {
      "epoch": 2.579394387001477,
      "grad_norm": 0.25530558824539185,
      "learning_rate": 2.8057642566818577e-05,
      "loss": 0.1471,
      "step": 6985
    },
    {
      "epoch": 2.5797636632200884,
      "grad_norm": 0.24739280343055725,
      "learning_rate": 2.803300899125508e-05,
      "loss": 0.15,
      "step": 6986
    },
    {
      "epoch": 2.5801329394387,
      "grad_norm": 0.3226220905780792,
      "learning_rate": 2.800837541569159e-05,
      "loss": 0.1695,
      "step": 6987
    },
    {
      "epoch": 2.5805022156573116,
      "grad_norm": 0.2757609188556671,
      "learning_rate": 2.7983741840128097e-05,
      "loss": 0.1619,
      "step": 6988
    },
    {
      "epoch": 2.5808714918759232,
      "grad_norm": 0.3205515146255493,
      "learning_rate": 2.7959108264564605e-05,
      "loss": 0.1998,
      "step": 6989
    },
    {
      "epoch": 2.581240768094535,
      "grad_norm": 0.29517224431037903,
      "learning_rate": 2.793447468900111e-05,
      "loss": 0.1701,
      "step": 6990
    },
    {
      "epoch": 2.5816100443131464,
      "grad_norm": 0.2605873942375183,
      "learning_rate": 2.7909841113437617e-05,
      "loss": 0.1599,
      "step": 6991
    },
    {
      "epoch": 2.5819793205317576,
      "grad_norm": 0.3558996915817261,
      "learning_rate": 2.7885207537874125e-05,
      "loss": 0.1546,
      "step": 6992
    },
    {
      "epoch": 2.582348596750369,
      "grad_norm": 0.3165871500968933,
      "learning_rate": 2.7860573962310633e-05,
      "loss": 0.1983,
      "step": 6993
    },
    {
      "epoch": 2.582717872968981,
      "grad_norm": 0.264943391084671,
      "learning_rate": 2.7835940386747138e-05,
      "loss": 0.1712,
      "step": 6994
    },
    {
      "epoch": 2.5830871491875924,
      "grad_norm": 0.24953593313694,
      "learning_rate": 2.7811306811183645e-05,
      "loss": 0.1604,
      "step": 6995
    },
    {
      "epoch": 2.5834564254062036,
      "grad_norm": 0.2861347794532776,
      "learning_rate": 2.7786673235620153e-05,
      "loss": 0.1704,
      "step": 6996
    },
    {
      "epoch": 2.583825701624815,
      "grad_norm": 0.2639878988265991,
      "learning_rate": 2.776203966005666e-05,
      "loss": 0.1493,
      "step": 6997
    },
    {
      "epoch": 2.5841949778434268,
      "grad_norm": 0.26286205649375916,
      "learning_rate": 2.7737406084493166e-05,
      "loss": 0.156,
      "step": 6998
    },
    {
      "epoch": 2.5845642540620384,
      "grad_norm": 0.3136504590511322,
      "learning_rate": 2.7712772508929674e-05,
      "loss": 0.1745,
      "step": 6999
    },
    {
      "epoch": 2.58493353028065,
      "grad_norm": 0.23299533128738403,
      "learning_rate": 2.768813893336618e-05,
      "loss": 0.1485,
      "step": 7000
    },
    {
      "epoch": 2.58493353028065,
      "eval_loss": 0.24902725219726562,
      "eval_runtime": 5.8582,
      "eval_samples_per_second": 8.535,
      "eval_steps_per_second": 1.195,
      "step": 7000
    },
    {
      "epoch": 2.5853028064992616,
      "grad_norm": 0.25273215770721436,
      "learning_rate": 2.766350535780269e-05,
      "loss": 0.1335,
      "step": 7001
    },
    {
      "epoch": 2.585672082717873,
      "grad_norm": 0.25386619567871094,
      "learning_rate": 2.7638871782239194e-05,
      "loss": 0.149,
      "step": 7002
    },
    {
      "epoch": 2.5860413589364843,
      "grad_norm": 0.25805094838142395,
      "learning_rate": 2.76142382066757e-05,
      "loss": 0.1695,
      "step": 7003
    },
    {
      "epoch": 2.586410635155096,
      "grad_norm": 0.30421289801597595,
      "learning_rate": 2.758960463111221e-05,
      "loss": 0.1789,
      "step": 7004
    },
    {
      "epoch": 2.5867799113737076,
      "grad_norm": 0.2829640209674835,
      "learning_rate": 2.7564971055548717e-05,
      "loss": 0.1635,
      "step": 7005
    },
    {
      "epoch": 2.587149187592319,
      "grad_norm": 0.31826967000961304,
      "learning_rate": 2.7540337479985222e-05,
      "loss": 0.1885,
      "step": 7006
    },
    {
      "epoch": 2.5875184638109303,
      "grad_norm": 0.2631217837333679,
      "learning_rate": 2.751570390442173e-05,
      "loss": 0.1645,
      "step": 7007
    },
    {
      "epoch": 2.587887740029542,
      "grad_norm": 0.31671977043151855,
      "learning_rate": 2.7491070328858238e-05,
      "loss": 0.1714,
      "step": 7008
    },
    {
      "epoch": 2.5882570162481535,
      "grad_norm": 0.2654682993888855,
      "learning_rate": 2.7466436753294745e-05,
      "loss": 0.1562,
      "step": 7009
    },
    {
      "epoch": 2.588626292466765,
      "grad_norm": 0.2755813002586365,
      "learning_rate": 2.744180317773125e-05,
      "loss": 0.2074,
      "step": 7010
    },
    {
      "epoch": 2.5889955686853767,
      "grad_norm": 0.24998833239078522,
      "learning_rate": 2.7417169602167758e-05,
      "loss": 0.1542,
      "step": 7011
    },
    {
      "epoch": 2.5893648449039883,
      "grad_norm": 0.30420055985450745,
      "learning_rate": 2.7392536026604266e-05,
      "loss": 0.1582,
      "step": 7012
    },
    {
      "epoch": 2.5897341211226,
      "grad_norm": 0.29341813921928406,
      "learning_rate": 2.7367902451040773e-05,
      "loss": 0.1671,
      "step": 7013
    },
    {
      "epoch": 2.590103397341211,
      "grad_norm": 0.3467811644077301,
      "learning_rate": 2.7343268875477278e-05,
      "loss": 0.1561,
      "step": 7014
    },
    {
      "epoch": 2.5904726735598227,
      "grad_norm": 0.29726535081863403,
      "learning_rate": 2.7318635299913786e-05,
      "loss": 0.1703,
      "step": 7015
    },
    {
      "epoch": 2.5908419497784343,
      "grad_norm": 0.2435768097639084,
      "learning_rate": 2.7294001724350294e-05,
      "loss": 0.1588,
      "step": 7016
    },
    {
      "epoch": 2.591211225997046,
      "grad_norm": 0.3036586344242096,
      "learning_rate": 2.72693681487868e-05,
      "loss": 0.1948,
      "step": 7017
    },
    {
      "epoch": 2.591580502215657,
      "grad_norm": 0.29029715061187744,
      "learning_rate": 2.7244734573223306e-05,
      "loss": 0.1699,
      "step": 7018
    },
    {
      "epoch": 2.5919497784342687,
      "grad_norm": 0.3097158968448639,
      "learning_rate": 2.7220100997659814e-05,
      "loss": 0.192,
      "step": 7019
    },
    {
      "epoch": 2.5923190546528803,
      "grad_norm": 0.24611760675907135,
      "learning_rate": 2.7195467422096322e-05,
      "loss": 0.1487,
      "step": 7020
    },
    {
      "epoch": 2.592688330871492,
      "grad_norm": 0.27117979526519775,
      "learning_rate": 2.717083384653283e-05,
      "loss": 0.1662,
      "step": 7021
    },
    {
      "epoch": 2.5930576070901035,
      "grad_norm": 0.30515336990356445,
      "learning_rate": 2.7146200270969334e-05,
      "loss": 0.1606,
      "step": 7022
    },
    {
      "epoch": 2.593426883308715,
      "grad_norm": 0.25747841596603394,
      "learning_rate": 2.7121566695405835e-05,
      "loss": 0.1566,
      "step": 7023
    },
    {
      "epoch": 2.5937961595273267,
      "grad_norm": 0.28772181272506714,
      "learning_rate": 2.7096933119842343e-05,
      "loss": 0.1442,
      "step": 7024
    },
    {
      "epoch": 2.594165435745938,
      "grad_norm": 0.2716567814350128,
      "learning_rate": 2.707229954427885e-05,
      "loss": 0.164,
      "step": 7025
    },
    {
      "epoch": 2.5945347119645494,
      "grad_norm": 0.2506811022758484,
      "learning_rate": 2.704766596871536e-05,
      "loss": 0.1586,
      "step": 7026
    },
    {
      "epoch": 2.594903988183161,
      "grad_norm": 0.2633334994316101,
      "learning_rate": 2.7023032393151863e-05,
      "loss": 0.1517,
      "step": 7027
    },
    {
      "epoch": 2.5952732644017726,
      "grad_norm": 0.27225908637046814,
      "learning_rate": 2.699839881758837e-05,
      "loss": 0.1643,
      "step": 7028
    },
    {
      "epoch": 2.595642540620384,
      "grad_norm": 0.26902899146080017,
      "learning_rate": 2.697376524202488e-05,
      "loss": 0.1443,
      "step": 7029
    },
    {
      "epoch": 2.5960118168389954,
      "grad_norm": 0.2809258699417114,
      "learning_rate": 2.6949131666461387e-05,
      "loss": 0.167,
      "step": 7030
    },
    {
      "epoch": 2.596381093057607,
      "grad_norm": 0.33841001987457275,
      "learning_rate": 2.692449809089789e-05,
      "loss": 0.1795,
      "step": 7031
    },
    {
      "epoch": 2.5967503692762186,
      "grad_norm": 0.25426897406578064,
      "learning_rate": 2.68998645153344e-05,
      "loss": 0.1427,
      "step": 7032
    },
    {
      "epoch": 2.59711964549483,
      "grad_norm": 0.26426997780799866,
      "learning_rate": 2.6875230939770907e-05,
      "loss": 0.1493,
      "step": 7033
    },
    {
      "epoch": 2.597488921713442,
      "grad_norm": 0.32085245847702026,
      "learning_rate": 2.6850597364207415e-05,
      "loss": 0.1827,
      "step": 7034
    },
    {
      "epoch": 2.5978581979320534,
      "grad_norm": 0.24739843606948853,
      "learning_rate": 2.682596378864392e-05,
      "loss": 0.1459,
      "step": 7035
    },
    {
      "epoch": 2.5982274741506646,
      "grad_norm": 0.267869770526886,
      "learning_rate": 2.6801330213080427e-05,
      "loss": 0.1769,
      "step": 7036
    },
    {
      "epoch": 2.598596750369276,
      "grad_norm": 0.310130774974823,
      "learning_rate": 2.6776696637516935e-05,
      "loss": 0.1738,
      "step": 7037
    },
    {
      "epoch": 2.598966026587888,
      "grad_norm": 0.3026999831199646,
      "learning_rate": 2.6752063061953443e-05,
      "loss": 0.1742,
      "step": 7038
    },
    {
      "epoch": 2.5993353028064994,
      "grad_norm": 0.2886936068534851,
      "learning_rate": 2.6727429486389947e-05,
      "loss": 0.1384,
      "step": 7039
    },
    {
      "epoch": 2.5997045790251105,
      "grad_norm": 0.29449960589408875,
      "learning_rate": 2.6702795910826455e-05,
      "loss": 0.1704,
      "step": 7040
    },
    {
      "epoch": 2.600073855243722,
      "grad_norm": 0.31389927864074707,
      "learning_rate": 2.6678162335262963e-05,
      "loss": 0.1912,
      "step": 7041
    },
    {
      "epoch": 2.6004431314623337,
      "grad_norm": 0.3336137533187866,
      "learning_rate": 2.665352875969947e-05,
      "loss": 0.1662,
      "step": 7042
    },
    {
      "epoch": 2.6008124076809453,
      "grad_norm": 0.3057897984981537,
      "learning_rate": 2.6628895184135975e-05,
      "loss": 0.1711,
      "step": 7043
    },
    {
      "epoch": 2.601181683899557,
      "grad_norm": 0.26825442910194397,
      "learning_rate": 2.6604261608572483e-05,
      "loss": 0.1512,
      "step": 7044
    },
    {
      "epoch": 2.6015509601181686,
      "grad_norm": 0.30866551399230957,
      "learning_rate": 2.657962803300899e-05,
      "loss": 0.144,
      "step": 7045
    },
    {
      "epoch": 2.60192023633678,
      "grad_norm": 0.31755682826042175,
      "learning_rate": 2.65549944574455e-05,
      "loss": 0.1931,
      "step": 7046
    },
    {
      "epoch": 2.6022895125553913,
      "grad_norm": 0.3211946487426758,
      "learning_rate": 2.6530360881882004e-05,
      "loss": 0.1807,
      "step": 7047
    },
    {
      "epoch": 2.602658788774003,
      "grad_norm": 0.2673085331916809,
      "learning_rate": 2.650572730631851e-05,
      "loss": 0.168,
      "step": 7048
    },
    {
      "epoch": 2.6030280649926145,
      "grad_norm": 0.2836707830429077,
      "learning_rate": 2.648109373075502e-05,
      "loss": 0.1523,
      "step": 7049
    },
    {
      "epoch": 2.603397341211226,
      "grad_norm": 0.2749764323234558,
      "learning_rate": 2.6456460155191527e-05,
      "loss": 0.1709,
      "step": 7050
    },
    {
      "epoch": 2.603397341211226,
      "eval_loss": 0.24706825613975525,
      "eval_runtime": 5.8584,
      "eval_samples_per_second": 8.535,
      "eval_steps_per_second": 1.195,
      "step": 7050
    },
    {
      "epoch": 2.6037666174298373,
      "grad_norm": 0.3217591345310211,
      "learning_rate": 2.643182657962803e-05,
      "loss": 0.1623,
      "step": 7051
    },
    {
      "epoch": 2.604135893648449,
      "grad_norm": 0.26377376914024353,
      "learning_rate": 2.640719300406454e-05,
      "loss": 0.1687,
      "step": 7052
    },
    {
      "epoch": 2.6045051698670605,
      "grad_norm": 0.2643566429615021,
      "learning_rate": 2.6382559428501047e-05,
      "loss": 0.1493,
      "step": 7053
    },
    {
      "epoch": 2.604874446085672,
      "grad_norm": 0.304091215133667,
      "learning_rate": 2.6357925852937555e-05,
      "loss": 0.1999,
      "step": 7054
    },
    {
      "epoch": 2.6052437223042837,
      "grad_norm": 0.3017551302909851,
      "learning_rate": 2.633329227737406e-05,
      "loss": 0.1737,
      "step": 7055
    },
    {
      "epoch": 2.6056129985228953,
      "grad_norm": 0.28353890776634216,
      "learning_rate": 2.6308658701810568e-05,
      "loss": 0.1655,
      "step": 7056
    },
    {
      "epoch": 2.605982274741507,
      "grad_norm": 0.2601488530635834,
      "learning_rate": 2.6284025126247075e-05,
      "loss": 0.1713,
      "step": 7057
    },
    {
      "epoch": 2.606351550960118,
      "grad_norm": 0.3210563063621521,
      "learning_rate": 2.6259391550683583e-05,
      "loss": 0.1646,
      "step": 7058
    },
    {
      "epoch": 2.6067208271787297,
      "grad_norm": 0.2617020905017853,
      "learning_rate": 2.6234757975120088e-05,
      "loss": 0.182,
      "step": 7059
    },
    {
      "epoch": 2.6070901033973413,
      "grad_norm": 0.26443079113960266,
      "learning_rate": 2.6210124399556596e-05,
      "loss": 0.1558,
      "step": 7060
    },
    {
      "epoch": 2.607459379615953,
      "grad_norm": 0.3176937401294708,
      "learning_rate": 2.6185490823993103e-05,
      "loss": 0.1384,
      "step": 7061
    },
    {
      "epoch": 2.607828655834564,
      "grad_norm": 0.2367580235004425,
      "learning_rate": 2.616085724842961e-05,
      "loss": 0.1678,
      "step": 7062
    },
    {
      "epoch": 2.6081979320531756,
      "grad_norm": 0.21823006868362427,
      "learning_rate": 2.6136223672866116e-05,
      "loss": 0.1319,
      "step": 7063
    },
    {
      "epoch": 2.6085672082717872,
      "grad_norm": 0.2675396502017975,
      "learning_rate": 2.6111590097302624e-05,
      "loss": 0.1648,
      "step": 7064
    },
    {
      "epoch": 2.608936484490399,
      "grad_norm": 0.24858689308166504,
      "learning_rate": 2.608695652173913e-05,
      "loss": 0.1692,
      "step": 7065
    },
    {
      "epoch": 2.6093057607090104,
      "grad_norm": 0.2861214578151703,
      "learning_rate": 2.606232294617564e-05,
      "loss": 0.1682,
      "step": 7066
    },
    {
      "epoch": 2.609675036927622,
      "grad_norm": 0.3085658550262451,
      "learning_rate": 2.6037689370612144e-05,
      "loss": 0.1773,
      "step": 7067
    },
    {
      "epoch": 2.6100443131462336,
      "grad_norm": 0.3473750948905945,
      "learning_rate": 2.6013055795048652e-05,
      "loss": 0.2119,
      "step": 7068
    },
    {
      "epoch": 2.610413589364845,
      "grad_norm": 0.29816731810569763,
      "learning_rate": 2.598842221948516e-05,
      "loss": 0.1709,
      "step": 7069
    },
    {
      "epoch": 2.6107828655834564,
      "grad_norm": 0.2761005163192749,
      "learning_rate": 2.5963788643921667e-05,
      "loss": 0.1499,
      "step": 7070
    },
    {
      "epoch": 2.611152141802068,
      "grad_norm": 0.403288334608078,
      "learning_rate": 2.5939155068358172e-05,
      "loss": 0.2018,
      "step": 7071
    },
    {
      "epoch": 2.6115214180206796,
      "grad_norm": 0.3042411208152771,
      "learning_rate": 2.591452149279468e-05,
      "loss": 0.1853,
      "step": 7072
    },
    {
      "epoch": 2.6118906942392908,
      "grad_norm": 0.2741173505783081,
      "learning_rate": 2.5889887917231188e-05,
      "loss": 0.1767,
      "step": 7073
    },
    {
      "epoch": 2.6122599704579024,
      "grad_norm": 0.28258514404296875,
      "learning_rate": 2.5865254341667696e-05,
      "loss": 0.1477,
      "step": 7074
    },
    {
      "epoch": 2.612629246676514,
      "grad_norm": 0.25004494190216064,
      "learning_rate": 2.58406207661042e-05,
      "loss": 0.1804,
      "step": 7075
    },
    {
      "epoch": 2.6129985228951256,
      "grad_norm": 0.24802225828170776,
      "learning_rate": 2.5815987190540708e-05,
      "loss": 0.1675,
      "step": 7076
    },
    {
      "epoch": 2.613367799113737,
      "grad_norm": 0.28887906670570374,
      "learning_rate": 2.5791353614977216e-05,
      "loss": 0.157,
      "step": 7077
    },
    {
      "epoch": 2.613737075332349,
      "grad_norm": 0.30445319414138794,
      "learning_rate": 2.5766720039413724e-05,
      "loss": 0.1905,
      "step": 7078
    },
    {
      "epoch": 2.6141063515509604,
      "grad_norm": 0.24961666762828827,
      "learning_rate": 2.5742086463850228e-05,
      "loss": 0.1533,
      "step": 7079
    },
    {
      "epoch": 2.6144756277695715,
      "grad_norm": 0.2499169558286667,
      "learning_rate": 2.5717452888286736e-05,
      "loss": 0.157,
      "step": 7080
    },
    {
      "epoch": 2.614844903988183,
      "grad_norm": 0.27537769079208374,
      "learning_rate": 2.5692819312723244e-05,
      "loss": 0.1714,
      "step": 7081
    },
    {
      "epoch": 2.6152141802067947,
      "grad_norm": 0.28446751832962036,
      "learning_rate": 2.566818573715975e-05,
      "loss": 0.1776,
      "step": 7082
    },
    {
      "epoch": 2.6155834564254064,
      "grad_norm": 0.3007389307022095,
      "learning_rate": 2.5643552161596256e-05,
      "loss": 0.1465,
      "step": 7083
    },
    {
      "epoch": 2.6159527326440175,
      "grad_norm": 0.23212134838104248,
      "learning_rate": 2.5618918586032764e-05,
      "loss": 0.142,
      "step": 7084
    },
    {
      "epoch": 2.616322008862629,
      "grad_norm": 0.30454760789871216,
      "learning_rate": 2.5594285010469272e-05,
      "loss": 0.1983,
      "step": 7085
    },
    {
      "epoch": 2.6166912850812407,
      "grad_norm": 0.3018072843551636,
      "learning_rate": 2.556965143490578e-05,
      "loss": 0.1812,
      "step": 7086
    },
    {
      "epoch": 2.6170605612998523,
      "grad_norm": 0.37997758388519287,
      "learning_rate": 2.5545017859342284e-05,
      "loss": 0.1549,
      "step": 7087
    },
    {
      "epoch": 2.617429837518464,
      "grad_norm": 0.2887950837612152,
      "learning_rate": 2.5520384283778792e-05,
      "loss": 0.1721,
      "step": 7088
    },
    {
      "epoch": 2.6177991137370755,
      "grad_norm": 0.2582404613494873,
      "learning_rate": 2.54957507082153e-05,
      "loss": 0.1622,
      "step": 7089
    },
    {
      "epoch": 2.618168389955687,
      "grad_norm": 0.22165557742118835,
      "learning_rate": 2.5471117132651808e-05,
      "loss": 0.1328,
      "step": 7090
    },
    {
      "epoch": 2.6185376661742983,
      "grad_norm": 0.25365129113197327,
      "learning_rate": 2.5446483557088312e-05,
      "loss": 0.1366,
      "step": 7091
    },
    {
      "epoch": 2.61890694239291,
      "grad_norm": 0.2626218795776367,
      "learning_rate": 2.542184998152482e-05,
      "loss": 0.1735,
      "step": 7092
    },
    {
      "epoch": 2.6192762186115215,
      "grad_norm": 0.2618752717971802,
      "learning_rate": 2.5397216405961328e-05,
      "loss": 0.1669,
      "step": 7093
    },
    {
      "epoch": 2.619645494830133,
      "grad_norm": 0.30638962984085083,
      "learning_rate": 2.5372582830397832e-05,
      "loss": 0.1928,
      "step": 7094
    },
    {
      "epoch": 2.6200147710487443,
      "grad_norm": 0.2914068400859833,
      "learning_rate": 2.534794925483434e-05,
      "loss": 0.158,
      "step": 7095
    },
    {
      "epoch": 2.620384047267356,
      "grad_norm": 0.33337917923927307,
      "learning_rate": 2.5323315679270848e-05,
      "loss": 0.1945,
      "step": 7096
    },
    {
      "epoch": 2.6207533234859675,
      "grad_norm": 0.2358565479516983,
      "learning_rate": 2.5298682103707356e-05,
      "loss": 0.1344,
      "step": 7097
    },
    {
      "epoch": 2.621122599704579,
      "grad_norm": 0.28325948119163513,
      "learning_rate": 2.527404852814386e-05,
      "loss": 0.1495,
      "step": 7098
    },
    {
      "epoch": 2.6214918759231907,
      "grad_norm": 0.26230067014694214,
      "learning_rate": 2.524941495258037e-05,
      "loss": 0.172,
      "step": 7099
    },
    {
      "epoch": 2.6218611521418023,
      "grad_norm": 0.26534080505371094,
      "learning_rate": 2.5224781377016876e-05,
      "loss": 0.1646,
      "step": 7100
    },
    {
      "epoch": 2.6218611521418023,
      "eval_loss": 0.24772045016288757,
      "eval_runtime": 5.8546,
      "eval_samples_per_second": 8.54,
      "eval_steps_per_second": 1.196,
      "step": 7100
    },
    {
      "epoch": 2.6222304283604134,
      "grad_norm": 0.24729250371456146,
      "learning_rate": 2.5200147801453384e-05,
      "loss": 0.1593,
      "step": 7101
    },
    {
      "epoch": 2.622599704579025,
      "grad_norm": 0.2703579366207123,
      "learning_rate": 2.517551422588989e-05,
      "loss": 0.1625,
      "step": 7102
    },
    {
      "epoch": 2.6229689807976366,
      "grad_norm": 0.2587558627128601,
      "learning_rate": 2.5150880650326396e-05,
      "loss": 0.1571,
      "step": 7103
    },
    {
      "epoch": 2.6233382570162482,
      "grad_norm": 0.3044690489768982,
      "learning_rate": 2.5126247074762904e-05,
      "loss": 0.1632,
      "step": 7104
    },
    {
      "epoch": 2.62370753323486,
      "grad_norm": 0.2837722897529602,
      "learning_rate": 2.5101613499199412e-05,
      "loss": 0.1731,
      "step": 7105
    },
    {
      "epoch": 2.624076809453471,
      "grad_norm": 0.2909303307533264,
      "learning_rate": 2.5076979923635917e-05,
      "loss": 0.1724,
      "step": 7106
    },
    {
      "epoch": 2.6244460856720826,
      "grad_norm": 0.2665870189666748,
      "learning_rate": 2.5052346348072425e-05,
      "loss": 0.141,
      "step": 7107
    },
    {
      "epoch": 2.624815361890694,
      "grad_norm": 0.23444689810276031,
      "learning_rate": 2.5027712772508932e-05,
      "loss": 0.1714,
      "step": 7108
    },
    {
      "epoch": 2.625184638109306,
      "grad_norm": 0.22857993841171265,
      "learning_rate": 2.500307919694544e-05,
      "loss": 0.1602,
      "step": 7109
    },
    {
      "epoch": 2.6255539143279174,
      "grad_norm": 0.25740647315979004,
      "learning_rate": 2.4978445621381945e-05,
      "loss": 0.1445,
      "step": 7110
    },
    {
      "epoch": 2.625923190546529,
      "grad_norm": 0.31387707591056824,
      "learning_rate": 2.495381204581845e-05,
      "loss": 0.1786,
      "step": 7111
    },
    {
      "epoch": 2.62629246676514,
      "grad_norm": 0.2700439989566803,
      "learning_rate": 2.4929178470254957e-05,
      "loss": 0.1473,
      "step": 7112
    },
    {
      "epoch": 2.6266617429837518,
      "grad_norm": 0.2955355644226074,
      "learning_rate": 2.4904544894691465e-05,
      "loss": 0.19,
      "step": 7113
    },
    {
      "epoch": 2.6270310192023634,
      "grad_norm": 0.254533976316452,
      "learning_rate": 2.4879911319127973e-05,
      "loss": 0.1704,
      "step": 7114
    },
    {
      "epoch": 2.627400295420975,
      "grad_norm": 0.34027546644210815,
      "learning_rate": 2.4855277743564477e-05,
      "loss": 0.2173,
      "step": 7115
    },
    {
      "epoch": 2.6277695716395866,
      "grad_norm": 0.30294856429100037,
      "learning_rate": 2.4830644168000985e-05,
      "loss": 0.1746,
      "step": 7116
    },
    {
      "epoch": 2.6281388478581977,
      "grad_norm": 0.24490997195243835,
      "learning_rate": 2.4806010592437493e-05,
      "loss": 0.1534,
      "step": 7117
    },
    {
      "epoch": 2.6285081240768093,
      "grad_norm": 0.29865172505378723,
      "learning_rate": 2.4781377016874e-05,
      "loss": 0.1525,
      "step": 7118
    },
    {
      "epoch": 2.628877400295421,
      "grad_norm": 0.40556472539901733,
      "learning_rate": 2.4756743441310505e-05,
      "loss": 0.1557,
      "step": 7119
    },
    {
      "epoch": 2.6292466765140325,
      "grad_norm": 0.23522064089775085,
      "learning_rate": 2.4732109865747013e-05,
      "loss": 0.1824,
      "step": 7120
    },
    {
      "epoch": 2.629615952732644,
      "grad_norm": 0.26868516206741333,
      "learning_rate": 2.470747629018352e-05,
      "loss": 0.1634,
      "step": 7121
    },
    {
      "epoch": 2.6299852289512557,
      "grad_norm": 0.3045238256454468,
      "learning_rate": 2.468284271462003e-05,
      "loss": 0.1677,
      "step": 7122
    },
    {
      "epoch": 2.630354505169867,
      "grad_norm": 0.3067895770072937,
      "learning_rate": 2.4658209139056533e-05,
      "loss": 0.148,
      "step": 7123
    },
    {
      "epoch": 2.6307237813884785,
      "grad_norm": 0.24589458107948303,
      "learning_rate": 2.463357556349304e-05,
      "loss": 0.148,
      "step": 7124
    },
    {
      "epoch": 2.63109305760709,
      "grad_norm": 0.2567375898361206,
      "learning_rate": 2.460894198792955e-05,
      "loss": 0.1354,
      "step": 7125
    },
    {
      "epoch": 2.6314623338257017,
      "grad_norm": 0.31593817472457886,
      "learning_rate": 2.4584308412366057e-05,
      "loss": 0.1535,
      "step": 7126
    },
    {
      "epoch": 2.631831610044313,
      "grad_norm": 0.29164353013038635,
      "learning_rate": 2.455967483680256e-05,
      "loss": 0.1975,
      "step": 7127
    },
    {
      "epoch": 2.6322008862629245,
      "grad_norm": 0.28976204991340637,
      "learning_rate": 2.453504126123907e-05,
      "loss": 0.155,
      "step": 7128
    },
    {
      "epoch": 2.632570162481536,
      "grad_norm": 0.3702680170536041,
      "learning_rate": 2.4510407685675577e-05,
      "loss": 0.1778,
      "step": 7129
    },
    {
      "epoch": 2.6329394387001477,
      "grad_norm": 0.31121230125427246,
      "learning_rate": 2.4485774110112085e-05,
      "loss": 0.1924,
      "step": 7130
    },
    {
      "epoch": 2.6333087149187593,
      "grad_norm": 0.25303661823272705,
      "learning_rate": 2.446114053454859e-05,
      "loss": 0.1539,
      "step": 7131
    },
    {
      "epoch": 2.633677991137371,
      "grad_norm": 0.36884987354278564,
      "learning_rate": 2.4436506958985097e-05,
      "loss": 0.1691,
      "step": 7132
    },
    {
      "epoch": 2.6340472673559825,
      "grad_norm": 0.2501956522464752,
      "learning_rate": 2.4411873383421605e-05,
      "loss": 0.164,
      "step": 7133
    },
    {
      "epoch": 2.6344165435745936,
      "grad_norm": 0.27940261363983154,
      "learning_rate": 2.4387239807858113e-05,
      "loss": 0.1801,
      "step": 7134
    },
    {
      "epoch": 2.6347858197932053,
      "grad_norm": 0.25051239132881165,
      "learning_rate": 2.4362606232294618e-05,
      "loss": 0.1685,
      "step": 7135
    },
    {
      "epoch": 2.635155096011817,
      "grad_norm": 0.2795883119106293,
      "learning_rate": 2.4337972656731125e-05,
      "loss": 0.1588,
      "step": 7136
    },
    {
      "epoch": 2.6355243722304285,
      "grad_norm": 0.2900752127170563,
      "learning_rate": 2.4313339081167633e-05,
      "loss": 0.1703,
      "step": 7137
    },
    {
      "epoch": 2.6358936484490396,
      "grad_norm": 0.25440940260887146,
      "learning_rate": 2.428870550560414e-05,
      "loss": 0.1677,
      "step": 7138
    },
    {
      "epoch": 2.636262924667651,
      "grad_norm": 0.27722442150115967,
      "learning_rate": 2.4264071930040646e-05,
      "loss": 0.169,
      "step": 7139
    },
    {
      "epoch": 2.636632200886263,
      "grad_norm": 0.29610511660575867,
      "learning_rate": 2.4239438354477154e-05,
      "loss": 0.1592,
      "step": 7140
    },
    {
      "epoch": 2.6370014771048744,
      "grad_norm": 0.24841229617595673,
      "learning_rate": 2.421480477891366e-05,
      "loss": 0.1768,
      "step": 7141
    },
    {
      "epoch": 2.637370753323486,
      "grad_norm": 0.2565562129020691,
      "learning_rate": 2.419017120335017e-05,
      "loss": 0.176,
      "step": 7142
    },
    {
      "epoch": 2.6377400295420976,
      "grad_norm": 0.31593239307403564,
      "learning_rate": 2.4165537627786674e-05,
      "loss": 0.1982,
      "step": 7143
    },
    {
      "epoch": 2.6381093057607092,
      "grad_norm": 0.2690623998641968,
      "learning_rate": 2.414090405222318e-05,
      "loss": 0.1671,
      "step": 7144
    },
    {
      "epoch": 2.6384785819793204,
      "grad_norm": 0.267892450094223,
      "learning_rate": 2.411627047665969e-05,
      "loss": 0.1896,
      "step": 7145
    },
    {
      "epoch": 2.638847858197932,
      "grad_norm": 0.30485016107559204,
      "learning_rate": 2.4091636901096197e-05,
      "loss": 0.1639,
      "step": 7146
    },
    {
      "epoch": 2.6392171344165436,
      "grad_norm": 0.26398298144340515,
      "learning_rate": 2.4067003325532702e-05,
      "loss": 0.1551,
      "step": 7147
    },
    {
      "epoch": 2.639586410635155,
      "grad_norm": 0.26881563663482666,
      "learning_rate": 2.404236974996921e-05,
      "loss": 0.1686,
      "step": 7148
    },
    {
      "epoch": 2.6399556868537664,
      "grad_norm": 0.22321288287639618,
      "learning_rate": 2.4017736174405718e-05,
      "loss": 0.1431,
      "step": 7149
    },
    {
      "epoch": 2.640324963072378,
      "grad_norm": 0.25838327407836914,
      "learning_rate": 2.3993102598842222e-05,
      "loss": 0.1816,
      "step": 7150
    },
    {
      "epoch": 2.640324963072378,
      "eval_loss": 0.2477797269821167,
      "eval_runtime": 5.8593,
      "eval_samples_per_second": 8.533,
      "eval_steps_per_second": 1.195,
      "step": 7150
    },
    {
      "epoch": 2.6406942392909896,
      "grad_norm": 0.2646441161632538,
      "learning_rate": 2.396846902327873e-05,
      "loss": 0.1665,
      "step": 7151
    },
    {
      "epoch": 2.641063515509601,
      "grad_norm": 0.2569630742073059,
      "learning_rate": 2.3943835447715238e-05,
      "loss": 0.184,
      "step": 7152
    },
    {
      "epoch": 2.6414327917282128,
      "grad_norm": 0.27376681566238403,
      "learning_rate": 2.3919201872151746e-05,
      "loss": 0.1328,
      "step": 7153
    },
    {
      "epoch": 2.6418020679468244,
      "grad_norm": 0.2927257716655731,
      "learning_rate": 2.389456829658825e-05,
      "loss": 0.1587,
      "step": 7154
    },
    {
      "epoch": 2.642171344165436,
      "grad_norm": 0.25532981753349304,
      "learning_rate": 2.3869934721024758e-05,
      "loss": 0.1672,
      "step": 7155
    },
    {
      "epoch": 2.642540620384047,
      "grad_norm": 0.2832915484905243,
      "learning_rate": 2.3845301145461266e-05,
      "loss": 0.175,
      "step": 7156
    },
    {
      "epoch": 2.6429098966026587,
      "grad_norm": 0.23710231482982635,
      "learning_rate": 2.3820667569897774e-05,
      "loss": 0.1667,
      "step": 7157
    },
    {
      "epoch": 2.6432791728212703,
      "grad_norm": 0.25296664237976074,
      "learning_rate": 2.3796033994334278e-05,
      "loss": 0.1429,
      "step": 7158
    },
    {
      "epoch": 2.643648449039882,
      "grad_norm": 0.3041204512119293,
      "learning_rate": 2.3771400418770786e-05,
      "loss": 0.1895,
      "step": 7159
    },
    {
      "epoch": 2.644017725258493,
      "grad_norm": 0.2530307471752167,
      "learning_rate": 2.3746766843207294e-05,
      "loss": 0.1636,
      "step": 7160
    },
    {
      "epoch": 2.6443870014771047,
      "grad_norm": 0.38362157344818115,
      "learning_rate": 2.3722133267643802e-05,
      "loss": 0.2049,
      "step": 7161
    },
    {
      "epoch": 2.6447562776957163,
      "grad_norm": 0.3008923828601837,
      "learning_rate": 2.3697499692080306e-05,
      "loss": 0.1864,
      "step": 7162
    },
    {
      "epoch": 2.645125553914328,
      "grad_norm": 0.2282281070947647,
      "learning_rate": 2.3672866116516814e-05,
      "loss": 0.1311,
      "step": 7163
    },
    {
      "epoch": 2.6454948301329395,
      "grad_norm": 0.26647505164146423,
      "learning_rate": 2.3648232540953322e-05,
      "loss": 0.1801,
      "step": 7164
    },
    {
      "epoch": 2.645864106351551,
      "grad_norm": 0.3231697082519531,
      "learning_rate": 2.362359896538983e-05,
      "loss": 0.1886,
      "step": 7165
    },
    {
      "epoch": 2.6462333825701627,
      "grad_norm": 0.273769348859787,
      "learning_rate": 2.3598965389826334e-05,
      "loss": 0.1553,
      "step": 7166
    },
    {
      "epoch": 2.646602658788774,
      "grad_norm": 0.27067431807518005,
      "learning_rate": 2.3574331814262842e-05,
      "loss": 0.171,
      "step": 7167
    },
    {
      "epoch": 2.6469719350073855,
      "grad_norm": 0.40708816051483154,
      "learning_rate": 2.3549698238699347e-05,
      "loss": 0.1617,
      "step": 7168
    },
    {
      "epoch": 2.647341211225997,
      "grad_norm": 0.2595626413822174,
      "learning_rate": 2.3525064663135854e-05,
      "loss": 0.157,
      "step": 7169
    },
    {
      "epoch": 2.6477104874446087,
      "grad_norm": 0.2784646451473236,
      "learning_rate": 2.3500431087572362e-05,
      "loss": 0.1714,
      "step": 7170
    },
    {
      "epoch": 2.64807976366322,
      "grad_norm": 0.25493499636650085,
      "learning_rate": 2.3475797512008867e-05,
      "loss": 0.1634,
      "step": 7171
    },
    {
      "epoch": 2.6484490398818314,
      "grad_norm": 0.39086082577705383,
      "learning_rate": 2.3451163936445375e-05,
      "loss": 0.1879,
      "step": 7172
    },
    {
      "epoch": 2.648818316100443,
      "grad_norm": 0.24589529633522034,
      "learning_rate": 2.3426530360881883e-05,
      "loss": 0.1736,
      "step": 7173
    },
    {
      "epoch": 2.6491875923190547,
      "grad_norm": 0.23601077497005463,
      "learning_rate": 2.340189678531839e-05,
      "loss": 0.1308,
      "step": 7174
    },
    {
      "epoch": 2.6495568685376663,
      "grad_norm": 0.27976614236831665,
      "learning_rate": 2.3377263209754895e-05,
      "loss": 0.1838,
      "step": 7175
    },
    {
      "epoch": 2.649926144756278,
      "grad_norm": 0.27918046712875366,
      "learning_rate": 2.3352629634191403e-05,
      "loss": 0.1508,
      "step": 7176
    },
    {
      "epoch": 2.6502954209748895,
      "grad_norm": 0.23547807335853577,
      "learning_rate": 2.332799605862791e-05,
      "loss": 0.1569,
      "step": 7177
    },
    {
      "epoch": 2.6506646971935006,
      "grad_norm": 0.22876212000846863,
      "learning_rate": 2.330336248306442e-05,
      "loss": 0.1628,
      "step": 7178
    },
    {
      "epoch": 2.651033973412112,
      "grad_norm": 0.2528270483016968,
      "learning_rate": 2.3278728907500923e-05,
      "loss": 0.1615,
      "step": 7179
    },
    {
      "epoch": 2.651403249630724,
      "grad_norm": 0.3297088146209717,
      "learning_rate": 2.325409533193743e-05,
      "loss": 0.1569,
      "step": 7180
    },
    {
      "epoch": 2.6517725258493354,
      "grad_norm": 0.25134512782096863,
      "learning_rate": 2.322946175637394e-05,
      "loss": 0.1637,
      "step": 7181
    },
    {
      "epoch": 2.6521418020679466,
      "grad_norm": 0.2733760476112366,
      "learning_rate": 2.3204828180810447e-05,
      "loss": 0.1667,
      "step": 7182
    },
    {
      "epoch": 2.652511078286558,
      "grad_norm": 0.27315571904182434,
      "learning_rate": 2.318019460524695e-05,
      "loss": 0.1457,
      "step": 7183
    },
    {
      "epoch": 2.65288035450517,
      "grad_norm": 0.29086950421333313,
      "learning_rate": 2.315556102968346e-05,
      "loss": 0.1365,
      "step": 7184
    },
    {
      "epoch": 2.6532496307237814,
      "grad_norm": 0.304569274187088,
      "learning_rate": 2.3130927454119967e-05,
      "loss": 0.1812,
      "step": 7185
    },
    {
      "epoch": 2.653618906942393,
      "grad_norm": 0.2263062745332718,
      "learning_rate": 2.3106293878556475e-05,
      "loss": 0.1373,
      "step": 7186
    },
    {
      "epoch": 2.6539881831610046,
      "grad_norm": 0.279864639043808,
      "learning_rate": 2.308166030299298e-05,
      "loss": 0.1707,
      "step": 7187
    },
    {
      "epoch": 2.654357459379616,
      "grad_norm": 0.2562095820903778,
      "learning_rate": 2.3057026727429487e-05,
      "loss": 0.1673,
      "step": 7188
    },
    {
      "epoch": 2.6547267355982274,
      "grad_norm": 0.323538601398468,
      "learning_rate": 2.3032393151865995e-05,
      "loss": 0.2028,
      "step": 7189
    },
    {
      "epoch": 2.655096011816839,
      "grad_norm": 0.3011215925216675,
      "learning_rate": 2.3007759576302503e-05,
      "loss": 0.1953,
      "step": 7190
    },
    {
      "epoch": 2.6554652880354506,
      "grad_norm": 0.23197126388549805,
      "learning_rate": 2.2983126000739007e-05,
      "loss": 0.1471,
      "step": 7191
    },
    {
      "epoch": 2.655834564254062,
      "grad_norm": 0.27711254358291626,
      "learning_rate": 2.2958492425175515e-05,
      "loss": 0.1467,
      "step": 7192
    },
    {
      "epoch": 2.6562038404726733,
      "grad_norm": 0.3001069128513336,
      "learning_rate": 2.2933858849612023e-05,
      "loss": 0.1821,
      "step": 7193
    },
    {
      "epoch": 2.656573116691285,
      "grad_norm": 0.3010239601135254,
      "learning_rate": 2.290922527404853e-05,
      "loss": 0.1761,
      "step": 7194
    },
    {
      "epoch": 2.6569423929098965,
      "grad_norm": 0.25081583857536316,
      "learning_rate": 2.2884591698485035e-05,
      "loss": 0.1618,
      "step": 7195
    },
    {
      "epoch": 2.657311669128508,
      "grad_norm": 0.2852584719657898,
      "learning_rate": 2.2859958122921543e-05,
      "loss": 0.1478,
      "step": 7196
    },
    {
      "epoch": 2.6576809453471197,
      "grad_norm": 0.25639086961746216,
      "learning_rate": 2.283532454735805e-05,
      "loss": 0.1531,
      "step": 7197
    },
    {
      "epoch": 2.6580502215657313,
      "grad_norm": 0.26445454359054565,
      "learning_rate": 2.281069097179456e-05,
      "loss": 0.1583,
      "step": 7198
    },
    {
      "epoch": 2.658419497784343,
      "grad_norm": 0.3616959750652313,
      "learning_rate": 2.2786057396231063e-05,
      "loss": 0.1756,
      "step": 7199
    },
    {
      "epoch": 2.658788774002954,
      "grad_norm": 0.2844349443912506,
      "learning_rate": 2.276142382066757e-05,
      "loss": 0.149,
      "step": 7200
    },
    {
      "epoch": 2.658788774002954,
      "eval_loss": 0.24778319895267487,
      "eval_runtime": 5.8637,
      "eval_samples_per_second": 8.527,
      "eval_steps_per_second": 1.194,
      "step": 7200
    },
    {
      "epoch": 2.6591580502215657,
      "grad_norm": 0.28954705595970154,
      "learning_rate": 2.273679024510408e-05,
      "loss": 0.1493,
      "step": 7201
    },
    {
      "epoch": 2.6595273264401773,
      "grad_norm": 0.2820834517478943,
      "learning_rate": 2.2712156669540587e-05,
      "loss": 0.1543,
      "step": 7202
    },
    {
      "epoch": 2.659896602658789,
      "grad_norm": 0.2745341360569,
      "learning_rate": 2.268752309397709e-05,
      "loss": 0.1665,
      "step": 7203
    },
    {
      "epoch": 2.6602658788774,
      "grad_norm": 0.3849048912525177,
      "learning_rate": 2.26628895184136e-05,
      "loss": 0.1738,
      "step": 7204
    },
    {
      "epoch": 2.6606351550960117,
      "grad_norm": 0.2253219187259674,
      "learning_rate": 2.2638255942850107e-05,
      "loss": 0.1553,
      "step": 7205
    },
    {
      "epoch": 2.6610044313146233,
      "grad_norm": 0.2824893295764923,
      "learning_rate": 2.261362236728661e-05,
      "loss": 0.1569,
      "step": 7206
    },
    {
      "epoch": 2.661373707533235,
      "grad_norm": 0.2944081723690033,
      "learning_rate": 2.258898879172312e-05,
      "loss": 0.172,
      "step": 7207
    },
    {
      "epoch": 2.6617429837518465,
      "grad_norm": 0.26585111021995544,
      "learning_rate": 2.2564355216159627e-05,
      "loss": 0.1612,
      "step": 7208
    },
    {
      "epoch": 2.662112259970458,
      "grad_norm": 0.27536478638648987,
      "learning_rate": 2.2539721640596135e-05,
      "loss": 0.1691,
      "step": 7209
    },
    {
      "epoch": 2.6624815361890697,
      "grad_norm": 0.27154725790023804,
      "learning_rate": 2.251508806503264e-05,
      "loss": 0.1582,
      "step": 7210
    },
    {
      "epoch": 2.662850812407681,
      "grad_norm": 0.2940751612186432,
      "learning_rate": 2.2490454489469147e-05,
      "loss": 0.1803,
      "step": 7211
    },
    {
      "epoch": 2.6632200886262924,
      "grad_norm": 0.2881784737110138,
      "learning_rate": 2.2465820913905655e-05,
      "loss": 0.1549,
      "step": 7212
    },
    {
      "epoch": 2.663589364844904,
      "grad_norm": 0.23567473888397217,
      "learning_rate": 2.2441187338342163e-05,
      "loss": 0.1376,
      "step": 7213
    },
    {
      "epoch": 2.6639586410635157,
      "grad_norm": 0.277631014585495,
      "learning_rate": 2.2416553762778668e-05,
      "loss": 0.1794,
      "step": 7214
    },
    {
      "epoch": 2.664327917282127,
      "grad_norm": 0.3504440188407898,
      "learning_rate": 2.2391920187215176e-05,
      "loss": 0.1703,
      "step": 7215
    },
    {
      "epoch": 2.6646971935007384,
      "grad_norm": 0.2590697705745697,
      "learning_rate": 2.2367286611651683e-05,
      "loss": 0.1416,
      "step": 7216
    },
    {
      "epoch": 2.66506646971935,
      "grad_norm": 0.30892354249954224,
      "learning_rate": 2.234265303608819e-05,
      "loss": 0.1894,
      "step": 7217
    },
    {
      "epoch": 2.6654357459379616,
      "grad_norm": 0.3599696457386017,
      "learning_rate": 2.2318019460524696e-05,
      "loss": 0.1867,
      "step": 7218
    },
    {
      "epoch": 2.6658050221565732,
      "grad_norm": 0.3020095229148865,
      "learning_rate": 2.2293385884961204e-05,
      "loss": 0.1898,
      "step": 7219
    },
    {
      "epoch": 2.666174298375185,
      "grad_norm": 0.303668737411499,
      "learning_rate": 2.226875230939771e-05,
      "loss": 0.1608,
      "step": 7220
    },
    {
      "epoch": 2.6665435745937964,
      "grad_norm": 0.29848936200141907,
      "learning_rate": 2.224411873383422e-05,
      "loss": 0.167,
      "step": 7221
    },
    {
      "epoch": 2.6669128508124076,
      "grad_norm": 0.25844043493270874,
      "learning_rate": 2.2219485158270724e-05,
      "loss": 0.1657,
      "step": 7222
    },
    {
      "epoch": 2.667282127031019,
      "grad_norm": 0.2853298485279083,
      "learning_rate": 2.219485158270723e-05,
      "loss": 0.1865,
      "step": 7223
    },
    {
      "epoch": 2.667651403249631,
      "grad_norm": 0.2674068212509155,
      "learning_rate": 2.217021800714374e-05,
      "loss": 0.1593,
      "step": 7224
    },
    {
      "epoch": 2.6680206794682424,
      "grad_norm": 0.33980315923690796,
      "learning_rate": 2.2145584431580247e-05,
      "loss": 0.1794,
      "step": 7225
    },
    {
      "epoch": 2.6683899556868536,
      "grad_norm": 0.39625829458236694,
      "learning_rate": 2.2120950856016752e-05,
      "loss": 0.1848,
      "step": 7226
    },
    {
      "epoch": 2.668759231905465,
      "grad_norm": 0.2665499448776245,
      "learning_rate": 2.2096317280453256e-05,
      "loss": 0.1799,
      "step": 7227
    },
    {
      "epoch": 2.6691285081240768,
      "grad_norm": 0.3573761582374573,
      "learning_rate": 2.2071683704889764e-05,
      "loss": 0.1758,
      "step": 7228
    },
    {
      "epoch": 2.6694977843426884,
      "grad_norm": 0.2860005497932434,
      "learning_rate": 2.2047050129326272e-05,
      "loss": 0.1766,
      "step": 7229
    },
    {
      "epoch": 2.6698670605613,
      "grad_norm": 0.26474326848983765,
      "learning_rate": 2.202241655376278e-05,
      "loss": 0.151,
      "step": 7230
    },
    {
      "epoch": 2.6702363367799116,
      "grad_norm": 0.28622764348983765,
      "learning_rate": 2.1997782978199284e-05,
      "loss": 0.1692,
      "step": 7231
    },
    {
      "epoch": 2.670605612998523,
      "grad_norm": 0.2745191752910614,
      "learning_rate": 2.1973149402635792e-05,
      "loss": 0.2144,
      "step": 7232
    },
    {
      "epoch": 2.6709748892171343,
      "grad_norm": 0.23318861424922943,
      "learning_rate": 2.19485158270723e-05,
      "loss": 0.1593,
      "step": 7233
    },
    {
      "epoch": 2.671344165435746,
      "grad_norm": 0.29776158928871155,
      "learning_rate": 2.1923882251508808e-05,
      "loss": 0.1944,
      "step": 7234
    },
    {
      "epoch": 2.6717134416543575,
      "grad_norm": 0.2800130546092987,
      "learning_rate": 2.1899248675945312e-05,
      "loss": 0.1723,
      "step": 7235
    },
    {
      "epoch": 2.672082717872969,
      "grad_norm": 0.24548450112342834,
      "learning_rate": 2.187461510038182e-05,
      "loss": 0.1523,
      "step": 7236
    },
    {
      "epoch": 2.6724519940915803,
      "grad_norm": 0.29334062337875366,
      "learning_rate": 2.1849981524818328e-05,
      "loss": 0.1614,
      "step": 7237
    },
    {
      "epoch": 2.672821270310192,
      "grad_norm": 0.2908742427825928,
      "learning_rate": 2.1825347949254836e-05,
      "loss": 0.1684,
      "step": 7238
    },
    {
      "epoch": 2.6731905465288035,
      "grad_norm": 0.3410395085811615,
      "learning_rate": 2.180071437369134e-05,
      "loss": 0.2048,
      "step": 7239
    },
    {
      "epoch": 2.673559822747415,
      "grad_norm": 0.30411624908447266,
      "learning_rate": 2.177608079812785e-05,
      "loss": 0.2078,
      "step": 7240
    },
    {
      "epoch": 2.6739290989660267,
      "grad_norm": 0.28259730339050293,
      "learning_rate": 2.1751447222564356e-05,
      "loss": 0.1524,
      "step": 7241
    },
    {
      "epoch": 2.6742983751846383,
      "grad_norm": 0.2348640412092209,
      "learning_rate": 2.1726813647000864e-05,
      "loss": 0.1495,
      "step": 7242
    },
    {
      "epoch": 2.6746676514032495,
      "grad_norm": 0.22753603756427765,
      "learning_rate": 2.170218007143737e-05,
      "loss": 0.1608,
      "step": 7243
    },
    {
      "epoch": 2.675036927621861,
      "grad_norm": 0.2376619130373001,
      "learning_rate": 2.1677546495873876e-05,
      "loss": 0.1511,
      "step": 7244
    },
    {
      "epoch": 2.6754062038404727,
      "grad_norm": 0.2439901977777481,
      "learning_rate": 2.1652912920310384e-05,
      "loss": 0.1396,
      "step": 7245
    },
    {
      "epoch": 2.6757754800590843,
      "grad_norm": 0.4259640574455261,
      "learning_rate": 2.1628279344746892e-05,
      "loss": 0.1726,
      "step": 7246
    },
    {
      "epoch": 2.676144756277696,
      "grad_norm": 0.3889214098453522,
      "learning_rate": 2.1603645769183397e-05,
      "loss": 0.1962,
      "step": 7247
    },
    {
      "epoch": 2.676514032496307,
      "grad_norm": 0.30397069454193115,
      "learning_rate": 2.1579012193619905e-05,
      "loss": 0.173,
      "step": 7248
    },
    {
      "epoch": 2.6768833087149186,
      "grad_norm": 0.28692227602005005,
      "learning_rate": 2.1554378618056412e-05,
      "loss": 0.1692,
      "step": 7249
    },
    {
      "epoch": 2.6772525849335302,
      "grad_norm": 0.28447920083999634,
      "learning_rate": 2.152974504249292e-05,
      "loss": 0.167,
      "step": 7250
    },
    {
      "epoch": 2.6772525849335302,
      "eval_loss": 0.24530024826526642,
      "eval_runtime": 5.8478,
      "eval_samples_per_second": 8.55,
      "eval_steps_per_second": 1.197,
      "step": 7250
    },
    {
      "epoch": 2.677621861152142,
      "grad_norm": 0.2785339951515198,
      "learning_rate": 2.1505111466929425e-05,
      "loss": 0.2095,
      "step": 7251
    },
    {
      "epoch": 2.6779911373707534,
      "grad_norm": 0.22790326178073883,
      "learning_rate": 2.1480477891365933e-05,
      "loss": 0.1717,
      "step": 7252
    },
    {
      "epoch": 2.678360413589365,
      "grad_norm": 0.29789429903030396,
      "learning_rate": 2.145584431580244e-05,
      "loss": 0.1673,
      "step": 7253
    },
    {
      "epoch": 2.678729689807976,
      "grad_norm": 0.24409626424312592,
      "learning_rate": 2.143121074023895e-05,
      "loss": 0.1377,
      "step": 7254
    },
    {
      "epoch": 2.679098966026588,
      "grad_norm": 0.2257269024848938,
      "learning_rate": 2.1406577164675453e-05,
      "loss": 0.154,
      "step": 7255
    },
    {
      "epoch": 2.6794682422451994,
      "grad_norm": 0.3350788950920105,
      "learning_rate": 2.138194358911196e-05,
      "loss": 0.1946,
      "step": 7256
    },
    {
      "epoch": 2.679837518463811,
      "grad_norm": 0.25675123929977417,
      "learning_rate": 2.135731001354847e-05,
      "loss": 0.1748,
      "step": 7257
    },
    {
      "epoch": 2.680206794682422,
      "grad_norm": 0.3013542890548706,
      "learning_rate": 2.1332676437984976e-05,
      "loss": 0.1649,
      "step": 7258
    },
    {
      "epoch": 2.680576070901034,
      "grad_norm": 0.27731192111968994,
      "learning_rate": 2.130804286242148e-05,
      "loss": 0.1722,
      "step": 7259
    },
    {
      "epoch": 2.6809453471196454,
      "grad_norm": 0.24194374680519104,
      "learning_rate": 2.128340928685799e-05,
      "loss": 0.1598,
      "step": 7260
    },
    {
      "epoch": 2.681314623338257,
      "grad_norm": 0.32079851627349854,
      "learning_rate": 2.1258775711294497e-05,
      "loss": 0.1722,
      "step": 7261
    },
    {
      "epoch": 2.6816838995568686,
      "grad_norm": 0.24001316726207733,
      "learning_rate": 2.1234142135731004e-05,
      "loss": 0.1541,
      "step": 7262
    },
    {
      "epoch": 2.68205317577548,
      "grad_norm": 0.36363720893859863,
      "learning_rate": 2.120950856016751e-05,
      "loss": 0.1742,
      "step": 7263
    },
    {
      "epoch": 2.682422451994092,
      "grad_norm": 0.33500853180885315,
      "learning_rate": 2.1184874984604017e-05,
      "loss": 0.2092,
      "step": 7264
    },
    {
      "epoch": 2.682791728212703,
      "grad_norm": 0.34495463967323303,
      "learning_rate": 2.1160241409040525e-05,
      "loss": 0.204,
      "step": 7265
    },
    {
      "epoch": 2.6831610044313146,
      "grad_norm": 0.24614976346492767,
      "learning_rate": 2.113560783347703e-05,
      "loss": 0.1542,
      "step": 7266
    },
    {
      "epoch": 2.683530280649926,
      "grad_norm": 0.26398226618766785,
      "learning_rate": 2.1110974257913537e-05,
      "loss": 0.1702,
      "step": 7267
    },
    {
      "epoch": 2.6838995568685378,
      "grad_norm": 0.3053957223892212,
      "learning_rate": 2.1086340682350045e-05,
      "loss": 0.1601,
      "step": 7268
    },
    {
      "epoch": 2.684268833087149,
      "grad_norm": 0.30904287099838257,
      "learning_rate": 2.1061707106786553e-05,
      "loss": 0.1975,
      "step": 7269
    },
    {
      "epoch": 2.6846381093057605,
      "grad_norm": 0.269345223903656,
      "learning_rate": 2.1037073531223057e-05,
      "loss": 0.1854,
      "step": 7270
    },
    {
      "epoch": 2.685007385524372,
      "grad_norm": 0.26761114597320557,
      "learning_rate": 2.1012439955659565e-05,
      "loss": 0.1706,
      "step": 7271
    },
    {
      "epoch": 2.6853766617429837,
      "grad_norm": 0.25350290536880493,
      "learning_rate": 2.0987806380096073e-05,
      "loss": 0.1604,
      "step": 7272
    },
    {
      "epoch": 2.6857459379615953,
      "grad_norm": 0.25855568051338196,
      "learning_rate": 2.096317280453258e-05,
      "loss": 0.1843,
      "step": 7273
    },
    {
      "epoch": 2.686115214180207,
      "grad_norm": 0.26478880643844604,
      "learning_rate": 2.0938539228969085e-05,
      "loss": 0.1578,
      "step": 7274
    },
    {
      "epoch": 2.6864844903988185,
      "grad_norm": 0.2511434853076935,
      "learning_rate": 2.0913905653405593e-05,
      "loss": 0.1706,
      "step": 7275
    },
    {
      "epoch": 2.6868537666174297,
      "grad_norm": 0.2608022689819336,
      "learning_rate": 2.08892720778421e-05,
      "loss": 0.1684,
      "step": 7276
    },
    {
      "epoch": 2.6872230428360413,
      "grad_norm": 0.2644890248775482,
      "learning_rate": 2.086463850227861e-05,
      "loss": 0.1594,
      "step": 7277
    },
    {
      "epoch": 2.687592319054653,
      "grad_norm": 0.31899401545524597,
      "learning_rate": 2.0840004926715113e-05,
      "loss": 0.1843,
      "step": 7278
    },
    {
      "epoch": 2.6879615952732645,
      "grad_norm": 0.2581588923931122,
      "learning_rate": 2.081537135115162e-05,
      "loss": 0.1698,
      "step": 7279
    },
    {
      "epoch": 2.6883308714918757,
      "grad_norm": 0.30643972754478455,
      "learning_rate": 2.079073777558813e-05,
      "loss": 0.1924,
      "step": 7280
    },
    {
      "epoch": 2.6887001477104873,
      "grad_norm": 0.24889962375164032,
      "learning_rate": 2.0766104200024637e-05,
      "loss": 0.1481,
      "step": 7281
    },
    {
      "epoch": 2.689069423929099,
      "grad_norm": 0.2676466107368469,
      "learning_rate": 2.074147062446114e-05,
      "loss": 0.1592,
      "step": 7282
    },
    {
      "epoch": 2.6894387001477105,
      "grad_norm": 0.2941732108592987,
      "learning_rate": 2.071683704889765e-05,
      "loss": 0.1657,
      "step": 7283
    },
    {
      "epoch": 2.689807976366322,
      "grad_norm": 0.3045092225074768,
      "learning_rate": 2.0692203473334154e-05,
      "loss": 0.1631,
      "step": 7284
    },
    {
      "epoch": 2.6901772525849337,
      "grad_norm": 0.24443352222442627,
      "learning_rate": 2.066756989777066e-05,
      "loss": 0.1507,
      "step": 7285
    },
    {
      "epoch": 2.6905465288035453,
      "grad_norm": 0.2525525391101837,
      "learning_rate": 2.064293632220717e-05,
      "loss": 0.1475,
      "step": 7286
    },
    {
      "epoch": 2.6909158050221564,
      "grad_norm": 0.26321685314178467,
      "learning_rate": 2.0618302746643674e-05,
      "loss": 0.1668,
      "step": 7287
    },
    {
      "epoch": 2.691285081240768,
      "grad_norm": 0.28742846846580505,
      "learning_rate": 2.0593669171080182e-05,
      "loss": 0.1562,
      "step": 7288
    },
    {
      "epoch": 2.6916543574593796,
      "grad_norm": 0.22838294506072998,
      "learning_rate": 2.056903559551669e-05,
      "loss": 0.1531,
      "step": 7289
    },
    {
      "epoch": 2.6920236336779912,
      "grad_norm": 0.22701244056224823,
      "learning_rate": 2.0544402019953198e-05,
      "loss": 0.1411,
      "step": 7290
    },
    {
      "epoch": 2.6923929098966024,
      "grad_norm": 0.27404719591140747,
      "learning_rate": 2.0519768444389702e-05,
      "loss": 0.1716,
      "step": 7291
    },
    {
      "epoch": 2.692762186115214,
      "grad_norm": 0.25083160400390625,
      "learning_rate": 2.049513486882621e-05,
      "loss": 0.1655,
      "step": 7292
    },
    {
      "epoch": 2.6931314623338256,
      "grad_norm": 0.23635615408420563,
      "learning_rate": 2.0470501293262718e-05,
      "loss": 0.1516,
      "step": 7293
    },
    {
      "epoch": 2.693500738552437,
      "grad_norm": 0.257691890001297,
      "learning_rate": 2.0445867717699226e-05,
      "loss": 0.147,
      "step": 7294
    },
    {
      "epoch": 2.693870014771049,
      "grad_norm": 0.27022585272789,
      "learning_rate": 2.042123414213573e-05,
      "loss": 0.1873,
      "step": 7295
    },
    {
      "epoch": 2.6942392909896604,
      "grad_norm": 0.2678462564945221,
      "learning_rate": 2.0396600566572238e-05,
      "loss": 0.1589,
      "step": 7296
    },
    {
      "epoch": 2.694608567208272,
      "grad_norm": 0.2651064991950989,
      "learning_rate": 2.0371966991008746e-05,
      "loss": 0.1668,
      "step": 7297
    },
    {
      "epoch": 2.694977843426883,
      "grad_norm": 0.28784722089767456,
      "learning_rate": 2.0347333415445254e-05,
      "loss": 0.1801,
      "step": 7298
    },
    {
      "epoch": 2.695347119645495,
      "grad_norm": 0.2588948607444763,
      "learning_rate": 2.0322699839881758e-05,
      "loss": 0.1739,
      "step": 7299
    },
    {
      "epoch": 2.6957163958641064,
      "grad_norm": 0.21159175038337708,
      "learning_rate": 2.0298066264318266e-05,
      "loss": 0.1383,
      "step": 7300
    },
    {
      "epoch": 2.6957163958641064,
      "eval_loss": 0.2470039576292038,
      "eval_runtime": 5.8534,
      "eval_samples_per_second": 8.542,
      "eval_steps_per_second": 1.196,
      "step": 7300
    },
    {
      "epoch": 2.696085672082718,
      "grad_norm": 0.2615770399570465,
      "learning_rate": 2.0273432688754774e-05,
      "loss": 0.1607,
      "step": 7301
    },
    {
      "epoch": 2.696454948301329,
      "grad_norm": 0.2541896402835846,
      "learning_rate": 2.0248799113191282e-05,
      "loss": 0.1464,
      "step": 7302
    },
    {
      "epoch": 2.6968242245199407,
      "grad_norm": 0.22565540671348572,
      "learning_rate": 2.0224165537627786e-05,
      "loss": 0.1452,
      "step": 7303
    },
    {
      "epoch": 2.6971935007385524,
      "grad_norm": 0.22654181718826294,
      "learning_rate": 2.0199531962064294e-05,
      "loss": 0.1684,
      "step": 7304
    },
    {
      "epoch": 2.697562776957164,
      "grad_norm": 0.2730237543582916,
      "learning_rate": 2.0174898386500802e-05,
      "loss": 0.1788,
      "step": 7305
    },
    {
      "epoch": 2.6979320531757756,
      "grad_norm": 0.24497225880622864,
      "learning_rate": 2.015026481093731e-05,
      "loss": 0.1749,
      "step": 7306
    },
    {
      "epoch": 2.698301329394387,
      "grad_norm": 0.27761363983154297,
      "learning_rate": 2.0125631235373814e-05,
      "loss": 0.1719,
      "step": 7307
    },
    {
      "epoch": 2.6986706056129988,
      "grad_norm": 0.25468045473098755,
      "learning_rate": 2.0100997659810322e-05,
      "loss": 0.175,
      "step": 7308
    },
    {
      "epoch": 2.69903988183161,
      "grad_norm": 0.23648743331432343,
      "learning_rate": 2.007636408424683e-05,
      "loss": 0.1639,
      "step": 7309
    },
    {
      "epoch": 2.6994091580502215,
      "grad_norm": 0.25075390934944153,
      "learning_rate": 2.0051730508683338e-05,
      "loss": 0.1551,
      "step": 7310
    },
    {
      "epoch": 2.699778434268833,
      "grad_norm": 0.2640094459056854,
      "learning_rate": 2.0027096933119842e-05,
      "loss": 0.1693,
      "step": 7311
    },
    {
      "epoch": 2.7001477104874447,
      "grad_norm": 0.2702077627182007,
      "learning_rate": 2.000246335755635e-05,
      "loss": 0.1599,
      "step": 7312
    },
    {
      "epoch": 2.700516986706056,
      "grad_norm": 0.25153788924217224,
      "learning_rate": 1.9977829781992858e-05,
      "loss": 0.1471,
      "step": 7313
    },
    {
      "epoch": 2.7008862629246675,
      "grad_norm": 0.2792302668094635,
      "learning_rate": 1.9953196206429366e-05,
      "loss": 0.1658,
      "step": 7314
    },
    {
      "epoch": 2.701255539143279,
      "grad_norm": 0.2735605239868164,
      "learning_rate": 1.992856263086587e-05,
      "loss": 0.1621,
      "step": 7315
    },
    {
      "epoch": 2.7016248153618907,
      "grad_norm": 0.3304271697998047,
      "learning_rate": 1.9903929055302378e-05,
      "loss": 0.1854,
      "step": 7316
    },
    {
      "epoch": 2.7019940915805023,
      "grad_norm": 0.31572744250297546,
      "learning_rate": 1.9879295479738886e-05,
      "loss": 0.1842,
      "step": 7317
    },
    {
      "epoch": 2.702363367799114,
      "grad_norm": 0.28819313645362854,
      "learning_rate": 1.9854661904175394e-05,
      "loss": 0.181,
      "step": 7318
    },
    {
      "epoch": 2.7027326440177255,
      "grad_norm": 0.20998355746269226,
      "learning_rate": 1.98300283286119e-05,
      "loss": 0.1389,
      "step": 7319
    },
    {
      "epoch": 2.7031019202363367,
      "grad_norm": 0.2347504198551178,
      "learning_rate": 1.9805394753048406e-05,
      "loss": 0.1474,
      "step": 7320
    },
    {
      "epoch": 2.7034711964549483,
      "grad_norm": 0.271361768245697,
      "learning_rate": 1.9780761177484914e-05,
      "loss": 0.1576,
      "step": 7321
    },
    {
      "epoch": 2.70384047267356,
      "grad_norm": 0.26428288221359253,
      "learning_rate": 1.975612760192142e-05,
      "loss": 0.1707,
      "step": 7322
    },
    {
      "epoch": 2.7042097488921715,
      "grad_norm": 0.2701990604400635,
      "learning_rate": 1.9731494026357927e-05,
      "loss": 0.1931,
      "step": 7323
    },
    {
      "epoch": 2.7045790251107826,
      "grad_norm": 0.2151554822921753,
      "learning_rate": 1.9706860450794434e-05,
      "loss": 0.1538,
      "step": 7324
    },
    {
      "epoch": 2.7049483013293942,
      "grad_norm": 0.4270957410335541,
      "learning_rate": 1.9682226875230942e-05,
      "loss": 0.1852,
      "step": 7325
    },
    {
      "epoch": 2.705317577548006,
      "grad_norm": 0.26941752433776855,
      "learning_rate": 1.9657593299667447e-05,
      "loss": 0.1573,
      "step": 7326
    },
    {
      "epoch": 2.7056868537666174,
      "grad_norm": 0.30312639474868774,
      "learning_rate": 1.9632959724103955e-05,
      "loss": 0.1752,
      "step": 7327
    },
    {
      "epoch": 2.706056129985229,
      "grad_norm": 0.436477392911911,
      "learning_rate": 1.9608326148540462e-05,
      "loss": 0.1593,
      "step": 7328
    },
    {
      "epoch": 2.7064254062038406,
      "grad_norm": 0.2763493061065674,
      "learning_rate": 1.958369257297697e-05,
      "loss": 0.1656,
      "step": 7329
    },
    {
      "epoch": 2.7067946824224522,
      "grad_norm": 0.30371522903442383,
      "learning_rate": 1.9559058997413475e-05,
      "loss": 0.1905,
      "step": 7330
    },
    {
      "epoch": 2.7071639586410634,
      "grad_norm": 0.30406200885772705,
      "learning_rate": 1.9534425421849983e-05,
      "loss": 0.1813,
      "step": 7331
    },
    {
      "epoch": 2.707533234859675,
      "grad_norm": 0.2895963191986084,
      "learning_rate": 1.950979184628649e-05,
      "loss": 0.1681,
      "step": 7332
    },
    {
      "epoch": 2.7079025110782866,
      "grad_norm": 0.23385562002658844,
      "learning_rate": 1.9485158270723e-05,
      "loss": 0.1351,
      "step": 7333
    },
    {
      "epoch": 2.708271787296898,
      "grad_norm": 0.2363734096288681,
      "learning_rate": 1.9460524695159503e-05,
      "loss": 0.1661,
      "step": 7334
    },
    {
      "epoch": 2.7086410635155094,
      "grad_norm": 0.2647867798805237,
      "learning_rate": 1.943589111959601e-05,
      "loss": 0.1799,
      "step": 7335
    },
    {
      "epoch": 2.709010339734121,
      "grad_norm": 0.25815582275390625,
      "learning_rate": 1.941125754403252e-05,
      "loss": 0.1675,
      "step": 7336
    },
    {
      "epoch": 2.7093796159527326,
      "grad_norm": 0.31180018186569214,
      "learning_rate": 1.9386623968469026e-05,
      "loss": 0.1898,
      "step": 7337
    },
    {
      "epoch": 2.709748892171344,
      "grad_norm": 0.29835644364356995,
      "learning_rate": 1.936199039290553e-05,
      "loss": 0.1743,
      "step": 7338
    },
    {
      "epoch": 2.710118168389956,
      "grad_norm": 0.247036874294281,
      "learning_rate": 1.933735681734204e-05,
      "loss": 0.1429,
      "step": 7339
    },
    {
      "epoch": 2.7104874446085674,
      "grad_norm": 0.22366099059581757,
      "learning_rate": 1.9312723241778547e-05,
      "loss": 0.1471,
      "step": 7340
    },
    {
      "epoch": 2.710856720827179,
      "grad_norm": 0.2728867828845978,
      "learning_rate": 1.9288089666215054e-05,
      "loss": 0.1505,
      "step": 7341
    },
    {
      "epoch": 2.71122599704579,
      "grad_norm": 0.27170485258102417,
      "learning_rate": 1.926345609065156e-05,
      "loss": 0.1776,
      "step": 7342
    },
    {
      "epoch": 2.7115952732644018,
      "grad_norm": 0.22643336653709412,
      "learning_rate": 1.9238822515088063e-05,
      "loss": 0.1323,
      "step": 7343
    },
    {
      "epoch": 2.7119645494830134,
      "grad_norm": 0.3507428765296936,
      "learning_rate": 1.921418893952457e-05,
      "loss": 0.1854,
      "step": 7344
    },
    {
      "epoch": 2.712333825701625,
      "grad_norm": 0.2567720413208008,
      "learning_rate": 1.918955536396108e-05,
      "loss": 0.1701,
      "step": 7345
    },
    {
      "epoch": 2.712703101920236,
      "grad_norm": 0.280446320772171,
      "learning_rate": 1.9164921788397587e-05,
      "loss": 0.1639,
      "step": 7346
    },
    {
      "epoch": 2.7130723781388477,
      "grad_norm": 0.23421820998191833,
      "learning_rate": 1.914028821283409e-05,
      "loss": 0.1595,
      "step": 7347
    },
    {
      "epoch": 2.7134416543574593,
      "grad_norm": 0.2823607325553894,
      "learning_rate": 1.91156546372706e-05,
      "loss": 0.1668,
      "step": 7348
    },
    {
      "epoch": 2.713810930576071,
      "grad_norm": 0.28469452261924744,
      "learning_rate": 1.9091021061707107e-05,
      "loss": 0.1685,
      "step": 7349
    },
    {
      "epoch": 2.7141802067946825,
      "grad_norm": 0.31783851981163025,
      "learning_rate": 1.9066387486143615e-05,
      "loss": 0.1937,
      "step": 7350
    },
    {
      "epoch": 2.7141802067946825,
      "eval_loss": 0.24873405694961548,
      "eval_runtime": 5.854,
      "eval_samples_per_second": 8.541,
      "eval_steps_per_second": 1.196,
      "step": 7350
    },
    {
      "epoch": 2.714549483013294,
      "grad_norm": 0.30338403582572937,
      "learning_rate": 1.904175391058012e-05,
      "loss": 0.1554,
      "step": 7351
    },
    {
      "epoch": 2.7149187592319057,
      "grad_norm": 0.2567841708660126,
      "learning_rate": 1.9017120335016627e-05,
      "loss": 0.1511,
      "step": 7352
    },
    {
      "epoch": 2.715288035450517,
      "grad_norm": 0.2727099657058716,
      "learning_rate": 1.8992486759453135e-05,
      "loss": 0.2034,
      "step": 7353
    },
    {
      "epoch": 2.7156573116691285,
      "grad_norm": 0.27551233768463135,
      "learning_rate": 1.8967853183889643e-05,
      "loss": 0.1578,
      "step": 7354
    },
    {
      "epoch": 2.71602658788774,
      "grad_norm": 0.25147584080696106,
      "learning_rate": 1.8943219608326148e-05,
      "loss": 0.1446,
      "step": 7355
    },
    {
      "epoch": 2.7163958641063517,
      "grad_norm": 0.27308040857315063,
      "learning_rate": 1.8918586032762656e-05,
      "loss": 0.159,
      "step": 7356
    },
    {
      "epoch": 2.716765140324963,
      "grad_norm": 0.30654796957969666,
      "learning_rate": 1.8893952457199163e-05,
      "loss": 0.1725,
      "step": 7357
    },
    {
      "epoch": 2.7171344165435745,
      "grad_norm": 0.4544277489185333,
      "learning_rate": 1.886931888163567e-05,
      "loss": 0.2442,
      "step": 7358
    },
    {
      "epoch": 2.717503692762186,
      "grad_norm": 0.3067072331905365,
      "learning_rate": 1.8844685306072176e-05,
      "loss": 0.1859,
      "step": 7359
    },
    {
      "epoch": 2.7178729689807977,
      "grad_norm": 0.24779309332370758,
      "learning_rate": 1.8820051730508684e-05,
      "loss": 0.1556,
      "step": 7360
    },
    {
      "epoch": 2.7182422451994093,
      "grad_norm": 0.29944664239883423,
      "learning_rate": 1.879541815494519e-05,
      "loss": 0.1543,
      "step": 7361
    },
    {
      "epoch": 2.718611521418021,
      "grad_norm": 0.2566697597503662,
      "learning_rate": 1.87707845793817e-05,
      "loss": 0.176,
      "step": 7362
    },
    {
      "epoch": 2.7189807976366325,
      "grad_norm": 0.26690617203712463,
      "learning_rate": 1.8746151003818204e-05,
      "loss": 0.1493,
      "step": 7363
    },
    {
      "epoch": 2.7193500738552436,
      "grad_norm": 0.25145423412323,
      "learning_rate": 1.872151742825471e-05,
      "loss": 0.1742,
      "step": 7364
    },
    {
      "epoch": 2.7197193500738552,
      "grad_norm": 0.258533775806427,
      "learning_rate": 1.869688385269122e-05,
      "loss": 0.172,
      "step": 7365
    },
    {
      "epoch": 2.720088626292467,
      "grad_norm": 0.24946877360343933,
      "learning_rate": 1.8672250277127727e-05,
      "loss": 0.1633,
      "step": 7366
    },
    {
      "epoch": 2.7204579025110784,
      "grad_norm": 0.2537067234516144,
      "learning_rate": 1.8647616701564232e-05,
      "loss": 0.1564,
      "step": 7367
    },
    {
      "epoch": 2.7208271787296896,
      "grad_norm": 0.3265269994735718,
      "learning_rate": 1.862298312600074e-05,
      "loss": 0.1767,
      "step": 7368
    },
    {
      "epoch": 2.721196454948301,
      "grad_norm": 0.3031497299671173,
      "learning_rate": 1.8598349550437248e-05,
      "loss": 0.1716,
      "step": 7369
    },
    {
      "epoch": 2.721565731166913,
      "grad_norm": 0.2572455108165741,
      "learning_rate": 1.8573715974873755e-05,
      "loss": 0.1475,
      "step": 7370
    },
    {
      "epoch": 2.7219350073855244,
      "grad_norm": 0.25037527084350586,
      "learning_rate": 1.854908239931026e-05,
      "loss": 0.161,
      "step": 7371
    },
    {
      "epoch": 2.722304283604136,
      "grad_norm": 0.3232778310775757,
      "learning_rate": 1.8524448823746768e-05,
      "loss": 0.1806,
      "step": 7372
    },
    {
      "epoch": 2.7226735598227476,
      "grad_norm": 0.2545141875743866,
      "learning_rate": 1.8499815248183276e-05,
      "loss": 0.1484,
      "step": 7373
    },
    {
      "epoch": 2.7230428360413588,
      "grad_norm": 0.2592243552207947,
      "learning_rate": 1.8475181672619783e-05,
      "loss": 0.1487,
      "step": 7374
    },
    {
      "epoch": 2.7234121122599704,
      "grad_norm": 0.29469770193099976,
      "learning_rate": 1.8450548097056288e-05,
      "loss": 0.1554,
      "step": 7375
    },
    {
      "epoch": 2.723781388478582,
      "grad_norm": 0.27507486939430237,
      "learning_rate": 1.8425914521492796e-05,
      "loss": 0.1729,
      "step": 7376
    },
    {
      "epoch": 2.7241506646971936,
      "grad_norm": 0.30150166153907776,
      "learning_rate": 1.8401280945929304e-05,
      "loss": 0.1543,
      "step": 7377
    },
    {
      "epoch": 2.724519940915805,
      "grad_norm": 0.27005499601364136,
      "learning_rate": 1.8376647370365808e-05,
      "loss": 0.1503,
      "step": 7378
    },
    {
      "epoch": 2.7248892171344163,
      "grad_norm": 0.2621898651123047,
      "learning_rate": 1.8352013794802316e-05,
      "loss": 0.1894,
      "step": 7379
    },
    {
      "epoch": 2.725258493353028,
      "grad_norm": 0.2828858196735382,
      "learning_rate": 1.8327380219238824e-05,
      "loss": 0.1733,
      "step": 7380
    },
    {
      "epoch": 2.7256277695716395,
      "grad_norm": 0.27500414848327637,
      "learning_rate": 1.8302746643675332e-05,
      "loss": 0.214,
      "step": 7381
    },
    {
      "epoch": 2.725997045790251,
      "grad_norm": 0.2737962305545807,
      "learning_rate": 1.8278113068111836e-05,
      "loss": 0.1658,
      "step": 7382
    },
    {
      "epoch": 2.7263663220088628,
      "grad_norm": 0.22681257128715515,
      "learning_rate": 1.8253479492548344e-05,
      "loss": 0.1812,
      "step": 7383
    },
    {
      "epoch": 2.7267355982274744,
      "grad_norm": 0.2882000505924225,
      "learning_rate": 1.8228845916984852e-05,
      "loss": 0.1827,
      "step": 7384
    },
    {
      "epoch": 2.7271048744460855,
      "grad_norm": 0.23809807002544403,
      "learning_rate": 1.820421234142136e-05,
      "loss": 0.1305,
      "step": 7385
    },
    {
      "epoch": 2.727474150664697,
      "grad_norm": 0.298092246055603,
      "learning_rate": 1.8179578765857864e-05,
      "loss": 0.1688,
      "step": 7386
    },
    {
      "epoch": 2.7278434268833087,
      "grad_norm": 0.2639222741127014,
      "learning_rate": 1.8154945190294372e-05,
      "loss": 0.1739,
      "step": 7387
    },
    {
      "epoch": 2.7282127031019203,
      "grad_norm": 0.2812730073928833,
      "learning_rate": 1.813031161473088e-05,
      "loss": 0.1453,
      "step": 7388
    },
    {
      "epoch": 2.7285819793205315,
      "grad_norm": 0.23684032261371613,
      "learning_rate": 1.8105678039167388e-05,
      "loss": 0.1694,
      "step": 7389
    },
    {
      "epoch": 2.728951255539143,
      "grad_norm": 0.2545001208782196,
      "learning_rate": 1.8081044463603892e-05,
      "loss": 0.1579,
      "step": 7390
    },
    {
      "epoch": 2.7293205317577547,
      "grad_norm": 0.3009130656719208,
      "learning_rate": 1.80564108880404e-05,
      "loss": 0.1695,
      "step": 7391
    },
    {
      "epoch": 2.7296898079763663,
      "grad_norm": 0.2816237509250641,
      "learning_rate": 1.8031777312476908e-05,
      "loss": 0.1671,
      "step": 7392
    },
    {
      "epoch": 2.730059084194978,
      "grad_norm": 0.23764042556285858,
      "learning_rate": 1.8007143736913416e-05,
      "loss": 0.1511,
      "step": 7393
    },
    {
      "epoch": 2.7304283604135895,
      "grad_norm": 0.3010806143283844,
      "learning_rate": 1.798251016134992e-05,
      "loss": 0.1841,
      "step": 7394
    },
    {
      "epoch": 2.730797636632201,
      "grad_norm": 0.29286661744117737,
      "learning_rate": 1.7957876585786428e-05,
      "loss": 0.1628,
      "step": 7395
    },
    {
      "epoch": 2.7311669128508123,
      "grad_norm": 0.25835737586021423,
      "learning_rate": 1.7933243010222936e-05,
      "loss": 0.1481,
      "step": 7396
    },
    {
      "epoch": 2.731536189069424,
      "grad_norm": 0.29753971099853516,
      "learning_rate": 1.7908609434659444e-05,
      "loss": 0.1715,
      "step": 7397
    },
    {
      "epoch": 2.7319054652880355,
      "grad_norm": 0.3576243817806244,
      "learning_rate": 1.788397585909595e-05,
      "loss": 0.1985,
      "step": 7398
    },
    {
      "epoch": 2.732274741506647,
      "grad_norm": 0.2470478117465973,
      "learning_rate": 1.7859342283532456e-05,
      "loss": 0.1491,
      "step": 7399
    },
    {
      "epoch": 2.7326440177252582,
      "grad_norm": 0.31921714544296265,
      "learning_rate": 1.783470870796896e-05,
      "loss": 0.1799,
      "step": 7400
    },
    {
      "epoch": 2.7326440177252582,
      "eval_loss": 0.24810636043548584,
      "eval_runtime": 5.8585,
      "eval_samples_per_second": 8.535,
      "eval_steps_per_second": 1.195,
      "step": 7400
    },
    {
      "epoch": 2.73301329394387,
      "grad_norm": 0.23354244232177734,
      "learning_rate": 1.781007513240547e-05,
      "loss": 0.1468,
      "step": 7401
    },
    {
      "epoch": 2.7333825701624814,
      "grad_norm": 0.2450440526008606,
      "learning_rate": 1.7785441556841977e-05,
      "loss": 0.1508,
      "step": 7402
    },
    {
      "epoch": 2.733751846381093,
      "grad_norm": 0.27826738357543945,
      "learning_rate": 1.776080798127848e-05,
      "loss": 0.1648,
      "step": 7403
    },
    {
      "epoch": 2.7341211225997046,
      "grad_norm": 0.28776785731315613,
      "learning_rate": 1.773617440571499e-05,
      "loss": 0.1778,
      "step": 7404
    },
    {
      "epoch": 2.7344903988183162,
      "grad_norm": 0.29564327001571655,
      "learning_rate": 1.7711540830151497e-05,
      "loss": 0.1558,
      "step": 7405
    },
    {
      "epoch": 2.734859675036928,
      "grad_norm": 0.2605178952217102,
      "learning_rate": 1.7686907254588005e-05,
      "loss": 0.1625,
      "step": 7406
    },
    {
      "epoch": 2.735228951255539,
      "grad_norm": 0.3336915969848633,
      "learning_rate": 1.766227367902451e-05,
      "loss": 0.1909,
      "step": 7407
    },
    {
      "epoch": 2.7355982274741506,
      "grad_norm": 0.2898995578289032,
      "learning_rate": 1.7637640103461017e-05,
      "loss": 0.1698,
      "step": 7408
    },
    {
      "epoch": 2.735967503692762,
      "grad_norm": 0.30992692708969116,
      "learning_rate": 1.7613006527897525e-05,
      "loss": 0.175,
      "step": 7409
    },
    {
      "epoch": 2.736336779911374,
      "grad_norm": 0.23993481695652008,
      "learning_rate": 1.7588372952334033e-05,
      "loss": 0.1604,
      "step": 7410
    },
    {
      "epoch": 2.736706056129985,
      "grad_norm": 0.2633693516254425,
      "learning_rate": 1.7563739376770537e-05,
      "loss": 0.1417,
      "step": 7411
    },
    {
      "epoch": 2.7370753323485966,
      "grad_norm": 0.30885404348373413,
      "learning_rate": 1.7539105801207045e-05,
      "loss": 0.1609,
      "step": 7412
    },
    {
      "epoch": 2.737444608567208,
      "grad_norm": 0.3195990324020386,
      "learning_rate": 1.7514472225643553e-05,
      "loss": 0.179,
      "step": 7413
    },
    {
      "epoch": 2.7378138847858198,
      "grad_norm": 0.2127685546875,
      "learning_rate": 1.748983865008006e-05,
      "loss": 0.1614,
      "step": 7414
    },
    {
      "epoch": 2.7381831610044314,
      "grad_norm": 0.2454635202884674,
      "learning_rate": 1.7465205074516565e-05,
      "loss": 0.1538,
      "step": 7415
    },
    {
      "epoch": 2.738552437223043,
      "grad_norm": 0.265864759683609,
      "learning_rate": 1.7440571498953073e-05,
      "loss": 0.162,
      "step": 7416
    },
    {
      "epoch": 2.7389217134416546,
      "grad_norm": 0.2266344577074051,
      "learning_rate": 1.741593792338958e-05,
      "loss": 0.1592,
      "step": 7417
    },
    {
      "epoch": 2.7392909896602657,
      "grad_norm": 0.28104716539382935,
      "learning_rate": 1.739130434782609e-05,
      "loss": 0.1689,
      "step": 7418
    },
    {
      "epoch": 2.7396602658788773,
      "grad_norm": 0.2466869205236435,
      "learning_rate": 1.7366670772262593e-05,
      "loss": 0.15,
      "step": 7419
    },
    {
      "epoch": 2.740029542097489,
      "grad_norm": 0.2613917589187622,
      "learning_rate": 1.73420371966991e-05,
      "loss": 0.1626,
      "step": 7420
    },
    {
      "epoch": 2.7403988183161005,
      "grad_norm": 0.3566192388534546,
      "learning_rate": 1.731740362113561e-05,
      "loss": 0.1522,
      "step": 7421
    },
    {
      "epoch": 2.7407680945347117,
      "grad_norm": 0.3045487701892853,
      "learning_rate": 1.7292770045572117e-05,
      "loss": 0.1572,
      "step": 7422
    },
    {
      "epoch": 2.7411373707533233,
      "grad_norm": 0.2889017164707184,
      "learning_rate": 1.726813647000862e-05,
      "loss": 0.1947,
      "step": 7423
    },
    {
      "epoch": 2.741506646971935,
      "grad_norm": 0.2684076428413391,
      "learning_rate": 1.724350289444513e-05,
      "loss": 0.1695,
      "step": 7424
    },
    {
      "epoch": 2.7418759231905465,
      "grad_norm": 0.3007463812828064,
      "learning_rate": 1.7218869318881637e-05,
      "loss": 0.1749,
      "step": 7425
    },
    {
      "epoch": 2.742245199409158,
      "grad_norm": 0.28021693229675293,
      "learning_rate": 1.7194235743318145e-05,
      "loss": 0.163,
      "step": 7426
    },
    {
      "epoch": 2.7426144756277697,
      "grad_norm": 0.32350239157676697,
      "learning_rate": 1.716960216775465e-05,
      "loss": 0.1792,
      "step": 7427
    },
    {
      "epoch": 2.7429837518463813,
      "grad_norm": 0.23683792352676392,
      "learning_rate": 1.7144968592191157e-05,
      "loss": 0.1555,
      "step": 7428
    },
    {
      "epoch": 2.7433530280649925,
      "grad_norm": 0.31598547101020813,
      "learning_rate": 1.7120335016627665e-05,
      "loss": 0.1817,
      "step": 7429
    },
    {
      "epoch": 2.743722304283604,
      "grad_norm": 0.29410770535469055,
      "learning_rate": 1.7095701441064173e-05,
      "loss": 0.1754,
      "step": 7430
    },
    {
      "epoch": 2.7440915805022157,
      "grad_norm": 0.3389756381511688,
      "learning_rate": 1.7071067865500677e-05,
      "loss": 0.192,
      "step": 7431
    },
    {
      "epoch": 2.7444608567208273,
      "grad_norm": 0.41600510478019714,
      "learning_rate": 1.7046434289937185e-05,
      "loss": 0.2017,
      "step": 7432
    },
    {
      "epoch": 2.7448301329394384,
      "grad_norm": 0.25756680965423584,
      "learning_rate": 1.7021800714373693e-05,
      "loss": 0.1537,
      "step": 7433
    },
    {
      "epoch": 2.74519940915805,
      "grad_norm": 0.2631795108318329,
      "learning_rate": 1.69971671388102e-05,
      "loss": 0.1586,
      "step": 7434
    },
    {
      "epoch": 2.7455686853766617,
      "grad_norm": 0.2646562159061432,
      "learning_rate": 1.6972533563246706e-05,
      "loss": 0.1676,
      "step": 7435
    },
    {
      "epoch": 2.7459379615952733,
      "grad_norm": 0.2485722452402115,
      "learning_rate": 1.6947899987683213e-05,
      "loss": 0.1466,
      "step": 7436
    },
    {
      "epoch": 2.746307237813885,
      "grad_norm": 0.28804153203964233,
      "learning_rate": 1.692326641211972e-05,
      "loss": 0.1761,
      "step": 7437
    },
    {
      "epoch": 2.7466765140324965,
      "grad_norm": 0.3488936722278595,
      "learning_rate": 1.6898632836556226e-05,
      "loss": 0.1878,
      "step": 7438
    },
    {
      "epoch": 2.747045790251108,
      "grad_norm": 0.2528108060359955,
      "learning_rate": 1.6873999260992734e-05,
      "loss": 0.1571,
      "step": 7439
    },
    {
      "epoch": 2.7474150664697192,
      "grad_norm": 0.2982749342918396,
      "learning_rate": 1.684936568542924e-05,
      "loss": 0.1861,
      "step": 7440
    },
    {
      "epoch": 2.747784342688331,
      "grad_norm": 0.3304920196533203,
      "learning_rate": 1.682473210986575e-05,
      "loss": 0.1772,
      "step": 7441
    },
    {
      "epoch": 2.7481536189069424,
      "grad_norm": 0.23332808911800385,
      "learning_rate": 1.6800098534302254e-05,
      "loss": 0.1465,
      "step": 7442
    },
    {
      "epoch": 2.748522895125554,
      "grad_norm": 0.24102748930454254,
      "learning_rate": 1.677546495873876e-05,
      "loss": 0.1623,
      "step": 7443
    },
    {
      "epoch": 2.748892171344165,
      "grad_norm": 0.3148341476917267,
      "learning_rate": 1.675083138317527e-05,
      "loss": 0.1796,
      "step": 7444
    },
    {
      "epoch": 2.749261447562777,
      "grad_norm": 0.29295220971107483,
      "learning_rate": 1.6726197807611777e-05,
      "loss": 0.1627,
      "step": 7445
    },
    {
      "epoch": 2.7496307237813884,
      "grad_norm": 0.2994075119495392,
      "learning_rate": 1.6701564232048282e-05,
      "loss": 0.1813,
      "step": 7446
    },
    {
      "epoch": 2.75,
      "grad_norm": 0.2487352341413498,
      "learning_rate": 1.667693065648479e-05,
      "loss": 0.1868,
      "step": 7447
    },
    {
      "epoch": 2.7503692762186116,
      "grad_norm": 0.2617223560810089,
      "learning_rate": 1.6652297080921298e-05,
      "loss": 0.1762,
      "step": 7448
    },
    {
      "epoch": 2.750738552437223,
      "grad_norm": 0.25977933406829834,
      "learning_rate": 1.6627663505357805e-05,
      "loss": 0.1609,
      "step": 7449
    },
    {
      "epoch": 2.751107828655835,
      "grad_norm": 0.30660438537597656,
      "learning_rate": 1.660302992979431e-05,
      "loss": 0.1684,
      "step": 7450
    },
    {
      "epoch": 2.751107828655835,
      "eval_loss": 0.24653255939483643,
      "eval_runtime": 5.8482,
      "eval_samples_per_second": 8.55,
      "eval_steps_per_second": 1.197,
      "step": 7450
    },
    {
      "epoch": 2.751477104874446,
      "grad_norm": 0.27738091349601746,
      "learning_rate": 1.6578396354230818e-05,
      "loss": 0.1523,
      "step": 7451
    },
    {
      "epoch": 2.7518463810930576,
      "grad_norm": 0.3045938313007355,
      "learning_rate": 1.6553762778667326e-05,
      "loss": 0.1769,
      "step": 7452
    },
    {
      "epoch": 2.752215657311669,
      "grad_norm": 0.2948349714279175,
      "learning_rate": 1.6529129203103834e-05,
      "loss": 0.1821,
      "step": 7453
    },
    {
      "epoch": 2.7525849335302808,
      "grad_norm": 0.26535508036613464,
      "learning_rate": 1.6504495627540338e-05,
      "loss": 0.1579,
      "step": 7454
    },
    {
      "epoch": 2.752954209748892,
      "grad_norm": 0.2517685294151306,
      "learning_rate": 1.6479862051976846e-05,
      "loss": 0.1482,
      "step": 7455
    },
    {
      "epoch": 2.7533234859675035,
      "grad_norm": 0.270142525434494,
      "learning_rate": 1.6455228476413354e-05,
      "loss": 0.1756,
      "step": 7456
    },
    {
      "epoch": 2.753692762186115,
      "grad_norm": 0.2596772015094757,
      "learning_rate": 1.643059490084986e-05,
      "loss": 0.142,
      "step": 7457
    },
    {
      "epoch": 2.7540620384047267,
      "grad_norm": 0.28529229760169983,
      "learning_rate": 1.6405961325286366e-05,
      "loss": 0.1777,
      "step": 7458
    },
    {
      "epoch": 2.7544313146233383,
      "grad_norm": 0.23346681892871857,
      "learning_rate": 1.638132774972287e-05,
      "loss": 0.1521,
      "step": 7459
    },
    {
      "epoch": 2.75480059084195,
      "grad_norm": 0.29147687554359436,
      "learning_rate": 1.635669417415938e-05,
      "loss": 0.147,
      "step": 7460
    },
    {
      "epoch": 2.7551698670605616,
      "grad_norm": 0.2928764820098877,
      "learning_rate": 1.6332060598595886e-05,
      "loss": 0.1672,
      "step": 7461
    },
    {
      "epoch": 2.7555391432791727,
      "grad_norm": 0.22330345213413239,
      "learning_rate": 1.6307427023032394e-05,
      "loss": 0.1646,
      "step": 7462
    },
    {
      "epoch": 2.7559084194977843,
      "grad_norm": 0.2907005250453949,
      "learning_rate": 1.62827934474689e-05,
      "loss": 0.1669,
      "step": 7463
    },
    {
      "epoch": 2.756277695716396,
      "grad_norm": 0.2795524299144745,
      "learning_rate": 1.6258159871905406e-05,
      "loss": 0.1631,
      "step": 7464
    },
    {
      "epoch": 2.7566469719350075,
      "grad_norm": 0.31589746475219727,
      "learning_rate": 1.6233526296341914e-05,
      "loss": 0.178,
      "step": 7465
    },
    {
      "epoch": 2.7570162481536187,
      "grad_norm": 0.286522775888443,
      "learning_rate": 1.6208892720778422e-05,
      "loss": 0.1748,
      "step": 7466
    },
    {
      "epoch": 2.7573855243722303,
      "grad_norm": 0.3060949742794037,
      "learning_rate": 1.6184259145214927e-05,
      "loss": 0.186,
      "step": 7467
    },
    {
      "epoch": 2.757754800590842,
      "grad_norm": 0.23353999853134155,
      "learning_rate": 1.6159625569651435e-05,
      "loss": 0.1462,
      "step": 7468
    },
    {
      "epoch": 2.7581240768094535,
      "grad_norm": 0.27649784088134766,
      "learning_rate": 1.6134991994087942e-05,
      "loss": 0.1785,
      "step": 7469
    },
    {
      "epoch": 2.758493353028065,
      "grad_norm": 0.21699804067611694,
      "learning_rate": 1.611035841852445e-05,
      "loss": 0.1535,
      "step": 7470
    },
    {
      "epoch": 2.7588626292466767,
      "grad_norm": 0.2576650381088257,
      "learning_rate": 1.6085724842960955e-05,
      "loss": 0.1636,
      "step": 7471
    },
    {
      "epoch": 2.7592319054652883,
      "grad_norm": 0.2834946811199188,
      "learning_rate": 1.6061091267397463e-05,
      "loss": 0.1712,
      "step": 7472
    },
    {
      "epoch": 2.7596011816838995,
      "grad_norm": 0.25979647040367126,
      "learning_rate": 1.603645769183397e-05,
      "loss": 0.155,
      "step": 7473
    },
    {
      "epoch": 2.759970457902511,
      "grad_norm": 0.24807441234588623,
      "learning_rate": 1.601182411627048e-05,
      "loss": 0.1637,
      "step": 7474
    },
    {
      "epoch": 2.7603397341211227,
      "grad_norm": 0.2989766299724579,
      "learning_rate": 1.5987190540706983e-05,
      "loss": 0.1793,
      "step": 7475
    },
    {
      "epoch": 2.7607090103397343,
      "grad_norm": 0.25783130526542664,
      "learning_rate": 1.596255696514349e-05,
      "loss": 0.1805,
      "step": 7476
    },
    {
      "epoch": 2.7610782865583454,
      "grad_norm": 0.2817245423793793,
      "learning_rate": 1.593792338958e-05,
      "loss": 0.1511,
      "step": 7477
    },
    {
      "epoch": 2.761447562776957,
      "grad_norm": 0.19131001830101013,
      "learning_rate": 1.5913289814016506e-05,
      "loss": 0.1427,
      "step": 7478
    },
    {
      "epoch": 2.7618168389955686,
      "grad_norm": 0.32190608978271484,
      "learning_rate": 1.588865623845301e-05,
      "loss": 0.1695,
      "step": 7479
    },
    {
      "epoch": 2.7621861152141802,
      "grad_norm": 0.2699021100997925,
      "learning_rate": 1.586402266288952e-05,
      "loss": 0.1511,
      "step": 7480
    },
    {
      "epoch": 2.762555391432792,
      "grad_norm": 0.24477988481521606,
      "learning_rate": 1.5839389087326027e-05,
      "loss": 0.1548,
      "step": 7481
    },
    {
      "epoch": 2.7629246676514034,
      "grad_norm": 0.3021427392959595,
      "learning_rate": 1.5814755511762534e-05,
      "loss": 0.17,
      "step": 7482
    },
    {
      "epoch": 2.763293943870015,
      "grad_norm": 0.29378512501716614,
      "learning_rate": 1.579012193619904e-05,
      "loss": 0.1601,
      "step": 7483
    },
    {
      "epoch": 2.763663220088626,
      "grad_norm": 0.24656188488006592,
      "learning_rate": 1.5765488360635547e-05,
      "loss": 0.1388,
      "step": 7484
    },
    {
      "epoch": 2.764032496307238,
      "grad_norm": 0.26288262009620667,
      "learning_rate": 1.5740854785072055e-05,
      "loss": 0.1836,
      "step": 7485
    },
    {
      "epoch": 2.7644017725258494,
      "grad_norm": 0.2798362374305725,
      "learning_rate": 1.5716221209508563e-05,
      "loss": 0.1408,
      "step": 7486
    },
    {
      "epoch": 2.764771048744461,
      "grad_norm": 0.26546165347099304,
      "learning_rate": 1.5691587633945067e-05,
      "loss": 0.1668,
      "step": 7487
    },
    {
      "epoch": 2.765140324963072,
      "grad_norm": 0.28876855969429016,
      "learning_rate": 1.5666954058381575e-05,
      "loss": 0.1567,
      "step": 7488
    },
    {
      "epoch": 2.7655096011816838,
      "grad_norm": 0.31431934237480164,
      "learning_rate": 1.5642320482818083e-05,
      "loss": 0.1786,
      "step": 7489
    },
    {
      "epoch": 2.7658788774002954,
      "grad_norm": 0.24657383561134338,
      "learning_rate": 1.561768690725459e-05,
      "loss": 0.1596,
      "step": 7490
    },
    {
      "epoch": 2.766248153618907,
      "grad_norm": 0.25895577669143677,
      "learning_rate": 1.5593053331691095e-05,
      "loss": 0.1704,
      "step": 7491
    },
    {
      "epoch": 2.7666174298375186,
      "grad_norm": 0.22705091536045074,
      "learning_rate": 1.5568419756127603e-05,
      "loss": 0.1386,
      "step": 7492
    },
    {
      "epoch": 2.76698670605613,
      "grad_norm": 0.30566298961639404,
      "learning_rate": 1.554378618056411e-05,
      "loss": 0.1654,
      "step": 7493
    },
    {
      "epoch": 2.7673559822747418,
      "grad_norm": 0.28241631388664246,
      "learning_rate": 1.5519152605000615e-05,
      "loss": 0.1643,
      "step": 7494
    },
    {
      "epoch": 2.767725258493353,
      "grad_norm": 0.25129958987236023,
      "learning_rate": 1.5494519029437123e-05,
      "loss": 0.1526,
      "step": 7495
    },
    {
      "epoch": 2.7680945347119645,
      "grad_norm": 0.26767459511756897,
      "learning_rate": 1.546988545387363e-05,
      "loss": 0.1447,
      "step": 7496
    },
    {
      "epoch": 2.768463810930576,
      "grad_norm": 0.2640560567378998,
      "learning_rate": 1.544525187831014e-05,
      "loss": 0.1578,
      "step": 7497
    },
    {
      "epoch": 2.7688330871491877,
      "grad_norm": 0.28000009059906006,
      "learning_rate": 1.5420618302746643e-05,
      "loss": 0.1627,
      "step": 7498
    },
    {
      "epoch": 2.769202363367799,
      "grad_norm": 0.3205571472644806,
      "learning_rate": 1.539598472718315e-05,
      "loss": 0.1918,
      "step": 7499
    },
    {
      "epoch": 2.7695716395864105,
      "grad_norm": 0.3204624354839325,
      "learning_rate": 1.537135115161966e-05,
      "loss": 0.1702,
      "step": 7500
    },
    {
      "epoch": 2.7695716395864105,
      "eval_loss": 0.2450689673423767,
      "eval_runtime": 5.8526,
      "eval_samples_per_second": 8.543,
      "eval_steps_per_second": 1.196,
      "step": 7500
    },
    {
      "epoch": 2.769940915805022,
      "grad_norm": 0.28050729632377625,
      "learning_rate": 1.5346717576056167e-05,
      "loss": 0.1719,
      "step": 7501
    },
    {
      "epoch": 2.7703101920236337,
      "grad_norm": 0.29927536845207214,
      "learning_rate": 1.532208400049267e-05,
      "loss": 0.1803,
      "step": 7502
    },
    {
      "epoch": 2.7706794682422453,
      "grad_norm": 0.27131029963493347,
      "learning_rate": 1.529745042492918e-05,
      "loss": 0.1562,
      "step": 7503
    },
    {
      "epoch": 2.771048744460857,
      "grad_norm": 0.2462558150291443,
      "learning_rate": 1.5272816849365687e-05,
      "loss": 0.1732,
      "step": 7504
    },
    {
      "epoch": 2.7714180206794685,
      "grad_norm": 0.3052695393562317,
      "learning_rate": 1.5248183273802193e-05,
      "loss": 0.182,
      "step": 7505
    },
    {
      "epoch": 2.7717872968980797,
      "grad_norm": 0.2922547459602356,
      "learning_rate": 1.5223549698238701e-05,
      "loss": 0.1652,
      "step": 7506
    },
    {
      "epoch": 2.7721565731166913,
      "grad_norm": 0.2786978781223297,
      "learning_rate": 1.5198916122675207e-05,
      "loss": 0.168,
      "step": 7507
    },
    {
      "epoch": 2.772525849335303,
      "grad_norm": 0.3016391694545746,
      "learning_rate": 1.5174282547111715e-05,
      "loss": 0.1726,
      "step": 7508
    },
    {
      "epoch": 2.7728951255539145,
      "grad_norm": 0.32139500975608826,
      "learning_rate": 1.5149648971548221e-05,
      "loss": 0.1961,
      "step": 7509
    },
    {
      "epoch": 2.7732644017725256,
      "grad_norm": 0.27514082193374634,
      "learning_rate": 1.512501539598473e-05,
      "loss": 0.1599,
      "step": 7510
    },
    {
      "epoch": 2.7736336779911372,
      "grad_norm": 0.24459095299243927,
      "learning_rate": 1.5100381820421235e-05,
      "loss": 0.1398,
      "step": 7511
    },
    {
      "epoch": 2.774002954209749,
      "grad_norm": 0.25074049830436707,
      "learning_rate": 1.5075748244857743e-05,
      "loss": 0.146,
      "step": 7512
    },
    {
      "epoch": 2.7743722304283605,
      "grad_norm": 0.2813052237033844,
      "learning_rate": 1.505111466929425e-05,
      "loss": 0.1514,
      "step": 7513
    },
    {
      "epoch": 2.774741506646972,
      "grad_norm": 0.29530805349349976,
      "learning_rate": 1.5026481093730757e-05,
      "loss": 0.1828,
      "step": 7514
    },
    {
      "epoch": 2.7751107828655837,
      "grad_norm": 0.2728615999221802,
      "learning_rate": 1.5001847518167263e-05,
      "loss": 0.1762,
      "step": 7515
    },
    {
      "epoch": 2.775480059084195,
      "grad_norm": 0.32301032543182373,
      "learning_rate": 1.4977213942603768e-05,
      "loss": 0.1744,
      "step": 7516
    },
    {
      "epoch": 2.7758493353028064,
      "grad_norm": 0.25327804684638977,
      "learning_rate": 1.4952580367040276e-05,
      "loss": 0.1542,
      "step": 7517
    },
    {
      "epoch": 2.776218611521418,
      "grad_norm": 0.3018170893192291,
      "learning_rate": 1.4927946791476782e-05,
      "loss": 0.1668,
      "step": 7518
    },
    {
      "epoch": 2.7765878877400296,
      "grad_norm": 0.24490094184875488,
      "learning_rate": 1.490331321591329e-05,
      "loss": 0.1478,
      "step": 7519
    },
    {
      "epoch": 2.7769571639586412,
      "grad_norm": 0.24875818192958832,
      "learning_rate": 1.4878679640349796e-05,
      "loss": 0.1579,
      "step": 7520
    },
    {
      "epoch": 2.7773264401772524,
      "grad_norm": 0.2581137716770172,
      "learning_rate": 1.4854046064786304e-05,
      "loss": 0.1937,
      "step": 7521
    },
    {
      "epoch": 2.777695716395864,
      "grad_norm": 0.30791181325912476,
      "learning_rate": 1.482941248922281e-05,
      "loss": 0.1687,
      "step": 7522
    },
    {
      "epoch": 2.7780649926144756,
      "grad_norm": 0.24371172487735748,
      "learning_rate": 1.4804778913659318e-05,
      "loss": 0.1589,
      "step": 7523
    },
    {
      "epoch": 2.778434268833087,
      "grad_norm": 0.21186070144176483,
      "learning_rate": 1.4780145338095824e-05,
      "loss": 0.1275,
      "step": 7524
    },
    {
      "epoch": 2.778803545051699,
      "grad_norm": 0.24601298570632935,
      "learning_rate": 1.4755511762532332e-05,
      "loss": 0.1428,
      "step": 7525
    },
    {
      "epoch": 2.7791728212703104,
      "grad_norm": 0.22553080320358276,
      "learning_rate": 1.4730878186968838e-05,
      "loss": 0.1385,
      "step": 7526
    },
    {
      "epoch": 2.7795420974889216,
      "grad_norm": 0.2764257490634918,
      "learning_rate": 1.4706244611405346e-05,
      "loss": 0.1826,
      "step": 7527
    },
    {
      "epoch": 2.779911373707533,
      "grad_norm": 0.2295643836259842,
      "learning_rate": 1.4681611035841852e-05,
      "loss": 0.1393,
      "step": 7528
    },
    {
      "epoch": 2.7802806499261448,
      "grad_norm": 0.25414568185806274,
      "learning_rate": 1.465697746027836e-05,
      "loss": 0.1711,
      "step": 7529
    },
    {
      "epoch": 2.7806499261447564,
      "grad_norm": 0.23428992927074432,
      "learning_rate": 1.4632343884714866e-05,
      "loss": 0.1539,
      "step": 7530
    },
    {
      "epoch": 2.7810192023633675,
      "grad_norm": 0.3167438209056854,
      "learning_rate": 1.4607710309151374e-05,
      "loss": 0.1805,
      "step": 7531
    },
    {
      "epoch": 2.781388478581979,
      "grad_norm": 0.26827964186668396,
      "learning_rate": 1.458307673358788e-05,
      "loss": 0.166,
      "step": 7532
    },
    {
      "epoch": 2.7817577548005907,
      "grad_norm": 0.2907755672931671,
      "learning_rate": 1.4558443158024388e-05,
      "loss": 0.1649,
      "step": 7533
    },
    {
      "epoch": 2.7821270310192023,
      "grad_norm": 0.2963904142379761,
      "learning_rate": 1.4533809582460894e-05,
      "loss": 0.1935,
      "step": 7534
    },
    {
      "epoch": 2.782496307237814,
      "grad_norm": 0.28386786580085754,
      "learning_rate": 1.4509176006897402e-05,
      "loss": 0.1479,
      "step": 7535
    },
    {
      "epoch": 2.7828655834564255,
      "grad_norm": 0.2962789833545685,
      "learning_rate": 1.4484542431333908e-05,
      "loss": 0.2001,
      "step": 7536
    },
    {
      "epoch": 2.783234859675037,
      "grad_norm": 0.23222845792770386,
      "learning_rate": 1.4459908855770416e-05,
      "loss": 0.1295,
      "step": 7537
    },
    {
      "epoch": 2.7836041358936483,
      "grad_norm": 0.3009457290172577,
      "learning_rate": 1.4435275280206922e-05,
      "loss": 0.163,
      "step": 7538
    },
    {
      "epoch": 2.78397341211226,
      "grad_norm": 0.3172808587551117,
      "learning_rate": 1.441064170464343e-05,
      "loss": 0.1642,
      "step": 7539
    },
    {
      "epoch": 2.7843426883308715,
      "grad_norm": 0.21474723517894745,
      "learning_rate": 1.4386008129079936e-05,
      "loss": 0.1401,
      "step": 7540
    },
    {
      "epoch": 2.784711964549483,
      "grad_norm": 0.2675321698188782,
      "learning_rate": 1.4361374553516444e-05,
      "loss": 0.1659,
      "step": 7541
    },
    {
      "epoch": 2.7850812407680943,
      "grad_norm": 0.30473241209983826,
      "learning_rate": 1.433674097795295e-05,
      "loss": 0.1672,
      "step": 7542
    },
    {
      "epoch": 2.785450516986706,
      "grad_norm": 0.24813975393772125,
      "learning_rate": 1.4312107402389458e-05,
      "loss": 0.1626,
      "step": 7543
    },
    {
      "epoch": 2.7858197932053175,
      "grad_norm": 0.2875143587589264,
      "learning_rate": 1.4287473826825964e-05,
      "loss": 0.1772,
      "step": 7544
    },
    {
      "epoch": 2.786189069423929,
      "grad_norm": 0.2905154824256897,
      "learning_rate": 1.4262840251262472e-05,
      "loss": 0.1586,
      "step": 7545
    },
    {
      "epoch": 2.7865583456425407,
      "grad_norm": 0.28693145513534546,
      "learning_rate": 1.4238206675698978e-05,
      "loss": 0.1635,
      "step": 7546
    },
    {
      "epoch": 2.7869276218611523,
      "grad_norm": 0.27007073163986206,
      "learning_rate": 1.4213573100135486e-05,
      "loss": 0.177,
      "step": 7547
    },
    {
      "epoch": 2.787296898079764,
      "grad_norm": 0.245378777384758,
      "learning_rate": 1.4188939524571992e-05,
      "loss": 0.1458,
      "step": 7548
    },
    {
      "epoch": 2.787666174298375,
      "grad_norm": 0.25880053639411926,
      "learning_rate": 1.41643059490085e-05,
      "loss": 0.178,
      "step": 7549
    },
    {
      "epoch": 2.7880354505169866,
      "grad_norm": 0.2758273184299469,
      "learning_rate": 1.4139672373445007e-05,
      "loss": 0.1639,
      "step": 7550
    },
    {
      "epoch": 2.7880354505169866,
      "eval_loss": 0.24555173516273499,
      "eval_runtime": 5.8616,
      "eval_samples_per_second": 8.53,
      "eval_steps_per_second": 1.194,
      "step": 7550
    },
    {
      "epoch": 2.7884047267355982,
      "grad_norm": 0.21397241950035095,
      "learning_rate": 1.4115038797881513e-05,
      "loss": 0.1413,
      "step": 7551
    },
    {
      "epoch": 2.78877400295421,
      "grad_norm": 0.3013254404067993,
      "learning_rate": 1.409040522231802e-05,
      "loss": 0.1707,
      "step": 7552
    },
    {
      "epoch": 2.789143279172821,
      "grad_norm": 0.2574707567691803,
      "learning_rate": 1.4065771646754527e-05,
      "loss": 0.1412,
      "step": 7553
    },
    {
      "epoch": 2.7895125553914326,
      "grad_norm": 0.29214608669281006,
      "learning_rate": 1.4041138071191035e-05,
      "loss": 0.1655,
      "step": 7554
    },
    {
      "epoch": 2.789881831610044,
      "grad_norm": 0.2661636769771576,
      "learning_rate": 1.401650449562754e-05,
      "loss": 0.1479,
      "step": 7555
    },
    {
      "epoch": 2.790251107828656,
      "grad_norm": 0.28535255789756775,
      "learning_rate": 1.3991870920064049e-05,
      "loss": 0.189,
      "step": 7556
    },
    {
      "epoch": 2.7906203840472674,
      "grad_norm": 0.28791603446006775,
      "learning_rate": 1.3967237344500555e-05,
      "loss": 0.165,
      "step": 7557
    },
    {
      "epoch": 2.790989660265879,
      "grad_norm": 0.2892257273197174,
      "learning_rate": 1.3942603768937063e-05,
      "loss": 0.1689,
      "step": 7558
    },
    {
      "epoch": 2.7913589364844906,
      "grad_norm": 0.2890913784503937,
      "learning_rate": 1.3917970193373569e-05,
      "loss": 0.2106,
      "step": 7559
    },
    {
      "epoch": 2.791728212703102,
      "grad_norm": 0.24501356482505798,
      "learning_rate": 1.3893336617810077e-05,
      "loss": 0.1439,
      "step": 7560
    },
    {
      "epoch": 2.7920974889217134,
      "grad_norm": 0.24362200498580933,
      "learning_rate": 1.3868703042246583e-05,
      "loss": 0.1476,
      "step": 7561
    },
    {
      "epoch": 2.792466765140325,
      "grad_norm": 0.2666977047920227,
      "learning_rate": 1.384406946668309e-05,
      "loss": 0.1734,
      "step": 7562
    },
    {
      "epoch": 2.7928360413589366,
      "grad_norm": 0.2529878318309784,
      "learning_rate": 1.3819435891119597e-05,
      "loss": 0.1729,
      "step": 7563
    },
    {
      "epoch": 2.7932053175775478,
      "grad_norm": 0.31165093183517456,
      "learning_rate": 1.3794802315556105e-05,
      "loss": 0.1924,
      "step": 7564
    },
    {
      "epoch": 2.7935745937961594,
      "grad_norm": 0.2584229111671448,
      "learning_rate": 1.3770168739992611e-05,
      "loss": 0.1505,
      "step": 7565
    },
    {
      "epoch": 2.793943870014771,
      "grad_norm": 0.23775674402713776,
      "learning_rate": 1.3745535164429119e-05,
      "loss": 0.1284,
      "step": 7566
    },
    {
      "epoch": 2.7943131462333826,
      "grad_norm": 0.2588885724544525,
      "learning_rate": 1.3720901588865625e-05,
      "loss": 0.1629,
      "step": 7567
    },
    {
      "epoch": 2.794682422451994,
      "grad_norm": 0.25051623582839966,
      "learning_rate": 1.3696268013302133e-05,
      "loss": 0.1449,
      "step": 7568
    },
    {
      "epoch": 2.7950516986706058,
      "grad_norm": 0.3355005979537964,
      "learning_rate": 1.3671634437738639e-05,
      "loss": 0.1843,
      "step": 7569
    },
    {
      "epoch": 2.7954209748892174,
      "grad_norm": 0.22919444739818573,
      "learning_rate": 1.3647000862175147e-05,
      "loss": 0.1554,
      "step": 7570
    },
    {
      "epoch": 2.7957902511078285,
      "grad_norm": 0.2616226375102997,
      "learning_rate": 1.3622367286611653e-05,
      "loss": 0.1497,
      "step": 7571
    },
    {
      "epoch": 2.79615952732644,
      "grad_norm": 0.28552570939064026,
      "learning_rate": 1.3597733711048161e-05,
      "loss": 0.1595,
      "step": 7572
    },
    {
      "epoch": 2.7965288035450517,
      "grad_norm": 0.344937801361084,
      "learning_rate": 1.3573100135484667e-05,
      "loss": 0.1861,
      "step": 7573
    },
    {
      "epoch": 2.7968980797636633,
      "grad_norm": 0.23454611003398895,
      "learning_rate": 1.3548466559921172e-05,
      "loss": 0.1539,
      "step": 7574
    },
    {
      "epoch": 2.7972673559822745,
      "grad_norm": 0.2591012120246887,
      "learning_rate": 1.352383298435768e-05,
      "loss": 0.1435,
      "step": 7575
    },
    {
      "epoch": 2.797636632200886,
      "grad_norm": 0.25938424468040466,
      "learning_rate": 1.3499199408794186e-05,
      "loss": 0.1684,
      "step": 7576
    },
    {
      "epoch": 2.7980059084194977,
      "grad_norm": 0.29442277550697327,
      "learning_rate": 1.3474565833230693e-05,
      "loss": 0.1721,
      "step": 7577
    },
    {
      "epoch": 2.7983751846381093,
      "grad_norm": 0.29727548360824585,
      "learning_rate": 1.34499322576672e-05,
      "loss": 0.1818,
      "step": 7578
    },
    {
      "epoch": 2.798744460856721,
      "grad_norm": 0.258197546005249,
      "learning_rate": 1.3425298682103707e-05,
      "loss": 0.1365,
      "step": 7579
    },
    {
      "epoch": 2.7991137370753325,
      "grad_norm": 0.2935611605644226,
      "learning_rate": 1.3400665106540214e-05,
      "loss": 0.1783,
      "step": 7580
    },
    {
      "epoch": 2.799483013293944,
      "grad_norm": 0.3015442490577698,
      "learning_rate": 1.3376031530976721e-05,
      "loss": 0.1652,
      "step": 7581
    },
    {
      "epoch": 2.7998522895125553,
      "grad_norm": 0.24497297406196594,
      "learning_rate": 1.3351397955413228e-05,
      "loss": 0.1634,
      "step": 7582
    },
    {
      "epoch": 2.800221565731167,
      "grad_norm": 0.29756438732147217,
      "learning_rate": 1.3326764379849736e-05,
      "loss": 0.138,
      "step": 7583
    },
    {
      "epoch": 2.8005908419497785,
      "grad_norm": 0.3196985721588135,
      "learning_rate": 1.3302130804286242e-05,
      "loss": 0.1776,
      "step": 7584
    },
    {
      "epoch": 2.80096011816839,
      "grad_norm": 0.33203697204589844,
      "learning_rate": 1.327749722872275e-05,
      "loss": 0.1944,
      "step": 7585
    },
    {
      "epoch": 2.8013293943870012,
      "grad_norm": 0.2631467580795288,
      "learning_rate": 1.3252863653159256e-05,
      "loss": 0.1636,
      "step": 7586
    },
    {
      "epoch": 2.801698670605613,
      "grad_norm": 0.277204692363739,
      "learning_rate": 1.3228230077595764e-05,
      "loss": 0.1718,
      "step": 7587
    },
    {
      "epoch": 2.8020679468242244,
      "grad_norm": 0.30720534920692444,
      "learning_rate": 1.320359650203227e-05,
      "loss": 0.1636,
      "step": 7588
    },
    {
      "epoch": 2.802437223042836,
      "grad_norm": 0.22924008965492249,
      "learning_rate": 1.3178962926468778e-05,
      "loss": 0.1341,
      "step": 7589
    },
    {
      "epoch": 2.8028064992614476,
      "grad_norm": 0.251715749502182,
      "learning_rate": 1.3154329350905284e-05,
      "loss": 0.1614,
      "step": 7590
    },
    {
      "epoch": 2.8031757754800593,
      "grad_norm": 0.26300671696662903,
      "learning_rate": 1.3129695775341792e-05,
      "loss": 0.139,
      "step": 7591
    },
    {
      "epoch": 2.803545051698671,
      "grad_norm": 0.3070409595966339,
      "learning_rate": 1.3105062199778298e-05,
      "loss": 0.2011,
      "step": 7592
    },
    {
      "epoch": 2.803914327917282,
      "grad_norm": 0.2788148522377014,
      "learning_rate": 1.3080428624214806e-05,
      "loss": 0.14,
      "step": 7593
    },
    {
      "epoch": 2.8042836041358936,
      "grad_norm": 0.28043505549430847,
      "learning_rate": 1.3055795048651312e-05,
      "loss": 0.1805,
      "step": 7594
    },
    {
      "epoch": 2.804652880354505,
      "grad_norm": 0.2902803421020508,
      "learning_rate": 1.303116147308782e-05,
      "loss": 0.1716,
      "step": 7595
    },
    {
      "epoch": 2.805022156573117,
      "grad_norm": 0.2920861542224884,
      "learning_rate": 1.3006527897524326e-05,
      "loss": 0.1676,
      "step": 7596
    },
    {
      "epoch": 2.805391432791728,
      "grad_norm": 0.2428940236568451,
      "learning_rate": 1.2981894321960834e-05,
      "loss": 0.1607,
      "step": 7597
    },
    {
      "epoch": 2.8057607090103396,
      "grad_norm": 0.28528156876564026,
      "learning_rate": 1.295726074639734e-05,
      "loss": 0.1677,
      "step": 7598
    },
    {
      "epoch": 2.806129985228951,
      "grad_norm": 0.22906848788261414,
      "learning_rate": 1.2932627170833848e-05,
      "loss": 0.1376,
      "step": 7599
    },
    {
      "epoch": 2.806499261447563,
      "grad_norm": 0.27055057883262634,
      "learning_rate": 1.2907993595270354e-05,
      "loss": 0.1574,
      "step": 7600
    },
    {
      "epoch": 2.806499261447563,
      "eval_loss": 0.2451067417860031,
      "eval_runtime": 5.8595,
      "eval_samples_per_second": 8.533,
      "eval_steps_per_second": 1.195,
      "step": 7600
    },
    {
      "epoch": 2.8068685376661744,
      "grad_norm": 0.2410304844379425,
      "learning_rate": 1.2883360019706862e-05,
      "loss": 0.1491,
      "step": 7601
    },
    {
      "epoch": 2.807237813884786,
      "grad_norm": 0.31664612889289856,
      "learning_rate": 1.2858726444143368e-05,
      "loss": 0.1627,
      "step": 7602
    },
    {
      "epoch": 2.8076070901033976,
      "grad_norm": 0.23882780969142914,
      "learning_rate": 1.2834092868579876e-05,
      "loss": 0.1438,
      "step": 7603
    },
    {
      "epoch": 2.8079763663220088,
      "grad_norm": 0.34225931763648987,
      "learning_rate": 1.2809459293016382e-05,
      "loss": 0.2023,
      "step": 7604
    },
    {
      "epoch": 2.8083456425406204,
      "grad_norm": 0.23802785575389862,
      "learning_rate": 1.278482571745289e-05,
      "loss": 0.1465,
      "step": 7605
    },
    {
      "epoch": 2.808714918759232,
      "grad_norm": 0.26142677664756775,
      "learning_rate": 1.2760192141889396e-05,
      "loss": 0.1704,
      "step": 7606
    },
    {
      "epoch": 2.8090841949778436,
      "grad_norm": 0.32098034024238586,
      "learning_rate": 1.2735558566325904e-05,
      "loss": 0.1825,
      "step": 7607
    },
    {
      "epoch": 2.8094534711964547,
      "grad_norm": 0.2660562992095947,
      "learning_rate": 1.271092499076241e-05,
      "loss": 0.1822,
      "step": 7608
    },
    {
      "epoch": 2.8098227474150663,
      "grad_norm": 0.2180064469575882,
      "learning_rate": 1.2686291415198916e-05,
      "loss": 0.1456,
      "step": 7609
    },
    {
      "epoch": 2.810192023633678,
      "grad_norm": 0.33249959349632263,
      "learning_rate": 1.2661657839635424e-05,
      "loss": 0.1766,
      "step": 7610
    },
    {
      "epoch": 2.8105612998522895,
      "grad_norm": 0.28326165676116943,
      "learning_rate": 1.263702426407193e-05,
      "loss": 0.1694,
      "step": 7611
    },
    {
      "epoch": 2.810930576070901,
      "grad_norm": 0.27733269333839417,
      "learning_rate": 1.2612390688508438e-05,
      "loss": 0.161,
      "step": 7612
    },
    {
      "epoch": 2.8112998522895127,
      "grad_norm": 0.31507349014282227,
      "learning_rate": 1.2587757112944944e-05,
      "loss": 0.2159,
      "step": 7613
    },
    {
      "epoch": 2.8116691285081243,
      "grad_norm": 0.2528786361217499,
      "learning_rate": 1.2563123537381452e-05,
      "loss": 0.1788,
      "step": 7614
    },
    {
      "epoch": 2.8120384047267355,
      "grad_norm": 0.2537676990032196,
      "learning_rate": 1.2538489961817958e-05,
      "loss": 0.1526,
      "step": 7615
    },
    {
      "epoch": 2.812407680945347,
      "grad_norm": 0.2868138253688812,
      "learning_rate": 1.2513856386254466e-05,
      "loss": 0.1538,
      "step": 7616
    },
    {
      "epoch": 2.8127769571639587,
      "grad_norm": 0.29323476552963257,
      "learning_rate": 1.2489222810690972e-05,
      "loss": 0.1685,
      "step": 7617
    },
    {
      "epoch": 2.8131462333825703,
      "grad_norm": 0.3060005009174347,
      "learning_rate": 1.2464589235127479e-05,
      "loss": 0.2033,
      "step": 7618
    },
    {
      "epoch": 2.8135155096011815,
      "grad_norm": 0.23000088334083557,
      "learning_rate": 1.2439955659563986e-05,
      "loss": 0.136,
      "step": 7619
    },
    {
      "epoch": 2.813884785819793,
      "grad_norm": 0.27344846725463867,
      "learning_rate": 1.2415322084000493e-05,
      "loss": 0.1613,
      "step": 7620
    },
    {
      "epoch": 2.8142540620384047,
      "grad_norm": 0.23537056148052216,
      "learning_rate": 1.2390688508437e-05,
      "loss": 0.1402,
      "step": 7621
    },
    {
      "epoch": 2.8146233382570163,
      "grad_norm": 0.2935500741004944,
      "learning_rate": 1.2366054932873507e-05,
      "loss": 0.1817,
      "step": 7622
    },
    {
      "epoch": 2.814992614475628,
      "grad_norm": 0.23089289665222168,
      "learning_rate": 1.2341421357310014e-05,
      "loss": 0.1562,
      "step": 7623
    },
    {
      "epoch": 2.8153618906942395,
      "grad_norm": 0.3644351065158844,
      "learning_rate": 1.231678778174652e-05,
      "loss": 0.1977,
      "step": 7624
    },
    {
      "epoch": 2.815731166912851,
      "grad_norm": 0.24921384453773499,
      "learning_rate": 1.2292154206183028e-05,
      "loss": 0.1506,
      "step": 7625
    },
    {
      "epoch": 2.8161004431314622,
      "grad_norm": 0.2631310224533081,
      "learning_rate": 1.2267520630619535e-05,
      "loss": 0.1496,
      "step": 7626
    },
    {
      "epoch": 2.816469719350074,
      "grad_norm": 0.2318798452615738,
      "learning_rate": 1.2242887055056043e-05,
      "loss": 0.1553,
      "step": 7627
    },
    {
      "epoch": 2.8168389955686854,
      "grad_norm": 0.2602575123310089,
      "learning_rate": 1.2218253479492549e-05,
      "loss": 0.1398,
      "step": 7628
    },
    {
      "epoch": 2.817208271787297,
      "grad_norm": 0.24905669689178467,
      "learning_rate": 1.2193619903929057e-05,
      "loss": 0.1685,
      "step": 7629
    },
    {
      "epoch": 2.817577548005908,
      "grad_norm": 0.28452542424201965,
      "learning_rate": 1.2168986328365563e-05,
      "loss": 0.1547,
      "step": 7630
    },
    {
      "epoch": 2.81794682422452,
      "grad_norm": 0.24822600185871124,
      "learning_rate": 1.214435275280207e-05,
      "loss": 0.1566,
      "step": 7631
    },
    {
      "epoch": 2.8183161004431314,
      "grad_norm": 0.23183251917362213,
      "learning_rate": 1.2119719177238577e-05,
      "loss": 0.134,
      "step": 7632
    },
    {
      "epoch": 2.818685376661743,
      "grad_norm": 0.3169245421886444,
      "learning_rate": 1.2095085601675085e-05,
      "loss": 0.1871,
      "step": 7633
    },
    {
      "epoch": 2.8190546528803546,
      "grad_norm": 0.24495553970336914,
      "learning_rate": 1.207045202611159e-05,
      "loss": 0.175,
      "step": 7634
    },
    {
      "epoch": 2.819423929098966,
      "grad_norm": 0.25172358751296997,
      "learning_rate": 1.2045818450548099e-05,
      "loss": 0.153,
      "step": 7635
    },
    {
      "epoch": 2.819793205317578,
      "grad_norm": 0.30053114891052246,
      "learning_rate": 1.2021184874984605e-05,
      "loss": 0.1693,
      "step": 7636
    },
    {
      "epoch": 2.820162481536189,
      "grad_norm": 0.3276084065437317,
      "learning_rate": 1.1996551299421111e-05,
      "loss": 0.1903,
      "step": 7637
    },
    {
      "epoch": 2.8205317577548006,
      "grad_norm": 0.30875363945961,
      "learning_rate": 1.1971917723857619e-05,
      "loss": 0.1865,
      "step": 7638
    },
    {
      "epoch": 2.820901033973412,
      "grad_norm": 0.22612623870372772,
      "learning_rate": 1.1947284148294125e-05,
      "loss": 0.1506,
      "step": 7639
    },
    {
      "epoch": 2.821270310192024,
      "grad_norm": 0.3551541268825531,
      "learning_rate": 1.1922650572730633e-05,
      "loss": 0.1901,
      "step": 7640
    },
    {
      "epoch": 2.821639586410635,
      "grad_norm": 0.2712530493736267,
      "learning_rate": 1.1898016997167139e-05,
      "loss": 0.1415,
      "step": 7641
    },
    {
      "epoch": 2.8220088626292466,
      "grad_norm": 0.3051557242870331,
      "learning_rate": 1.1873383421603647e-05,
      "loss": 0.1637,
      "step": 7642
    },
    {
      "epoch": 2.822378138847858,
      "grad_norm": 0.26176202297210693,
      "learning_rate": 1.1848749846040153e-05,
      "loss": 0.1679,
      "step": 7643
    },
    {
      "epoch": 2.8227474150664698,
      "grad_norm": 0.3065352737903595,
      "learning_rate": 1.1824116270476661e-05,
      "loss": 0.1692,
      "step": 7644
    },
    {
      "epoch": 2.8231166912850814,
      "grad_norm": 0.2914688289165497,
      "learning_rate": 1.1799482694913167e-05,
      "loss": 0.1423,
      "step": 7645
    },
    {
      "epoch": 2.823485967503693,
      "grad_norm": 0.2894628643989563,
      "learning_rate": 1.1774849119349673e-05,
      "loss": 0.1845,
      "step": 7646
    },
    {
      "epoch": 2.823855243722304,
      "grad_norm": 0.276998907327652,
      "learning_rate": 1.1750215543786181e-05,
      "loss": 0.1652,
      "step": 7647
    },
    {
      "epoch": 2.8242245199409157,
      "grad_norm": 0.24740327894687653,
      "learning_rate": 1.1725581968222687e-05,
      "loss": 0.1609,
      "step": 7648
    },
    {
      "epoch": 2.8245937961595273,
      "grad_norm": 0.28068864345550537,
      "learning_rate": 1.1700948392659195e-05,
      "loss": 0.1745,
      "step": 7649
    },
    {
      "epoch": 2.824963072378139,
      "grad_norm": 0.24768218398094177,
      "learning_rate": 1.1676314817095701e-05,
      "loss": 0.1664,
      "step": 7650
    },
    {
      "epoch": 2.824963072378139,
      "eval_loss": 0.24496839940547943,
      "eval_runtime": 5.8515,
      "eval_samples_per_second": 8.545,
      "eval_steps_per_second": 1.196,
      "step": 7650
    },
    {
      "epoch": 2.8253323485967505,
      "grad_norm": 0.265293151140213,
      "learning_rate": 1.165168124153221e-05,
      "loss": 0.1646,
      "step": 7651
    },
    {
      "epoch": 2.8257016248153617,
      "grad_norm": 0.2964051365852356,
      "learning_rate": 1.1627047665968715e-05,
      "loss": 0.1589,
      "step": 7652
    },
    {
      "epoch": 2.8260709010339733,
      "grad_norm": 0.26073381304740906,
      "learning_rate": 1.1602414090405223e-05,
      "loss": 0.1537,
      "step": 7653
    },
    {
      "epoch": 2.826440177252585,
      "grad_norm": 0.2753491997718811,
      "learning_rate": 1.157778051484173e-05,
      "loss": 0.1553,
      "step": 7654
    },
    {
      "epoch": 2.8268094534711965,
      "grad_norm": 0.31869035959243774,
      "learning_rate": 1.1553146939278237e-05,
      "loss": 0.1874,
      "step": 7655
    },
    {
      "epoch": 2.827178729689808,
      "grad_norm": 0.2545974552631378,
      "learning_rate": 1.1528513363714743e-05,
      "loss": 0.1361,
      "step": 7656
    },
    {
      "epoch": 2.8275480059084197,
      "grad_norm": 0.224674791097641,
      "learning_rate": 1.1503879788151251e-05,
      "loss": 0.1322,
      "step": 7657
    },
    {
      "epoch": 2.827917282127031,
      "grad_norm": 0.3442631661891937,
      "learning_rate": 1.1479246212587757e-05,
      "loss": 0.1972,
      "step": 7658
    },
    {
      "epoch": 2.8282865583456425,
      "grad_norm": 0.306291401386261,
      "learning_rate": 1.1454612637024265e-05,
      "loss": 0.1617,
      "step": 7659
    },
    {
      "epoch": 2.828655834564254,
      "grad_norm": 0.2775784432888031,
      "learning_rate": 1.1429979061460772e-05,
      "loss": 0.1678,
      "step": 7660
    },
    {
      "epoch": 2.8290251107828657,
      "grad_norm": 0.304565966129303,
      "learning_rate": 1.140534548589728e-05,
      "loss": 0.1939,
      "step": 7661
    },
    {
      "epoch": 2.829394387001477,
      "grad_norm": 0.3083300292491913,
      "learning_rate": 1.1380711910333786e-05,
      "loss": 0.1578,
      "step": 7662
    },
    {
      "epoch": 2.8297636632200884,
      "grad_norm": 0.30755531787872314,
      "learning_rate": 1.1356078334770293e-05,
      "loss": 0.1722,
      "step": 7663
    },
    {
      "epoch": 2.8301329394387,
      "grad_norm": 0.31816384196281433,
      "learning_rate": 1.13314447592068e-05,
      "loss": 0.1657,
      "step": 7664
    },
    {
      "epoch": 2.8305022156573116,
      "grad_norm": 0.25910794734954834,
      "learning_rate": 1.1306811183643306e-05,
      "loss": 0.1611,
      "step": 7665
    },
    {
      "epoch": 2.8308714918759232,
      "grad_norm": 0.24290092289447784,
      "learning_rate": 1.1282177608079814e-05,
      "loss": 0.1554,
      "step": 7666
    },
    {
      "epoch": 2.831240768094535,
      "grad_norm": 0.2623692750930786,
      "learning_rate": 1.125754403251632e-05,
      "loss": 0.1542,
      "step": 7667
    },
    {
      "epoch": 2.8316100443131464,
      "grad_norm": 0.25557443499565125,
      "learning_rate": 1.1232910456952828e-05,
      "loss": 0.1371,
      "step": 7668
    },
    {
      "epoch": 2.8319793205317576,
      "grad_norm": 0.3583131432533264,
      "learning_rate": 1.1208276881389334e-05,
      "loss": 0.1724,
      "step": 7669
    },
    {
      "epoch": 2.832348596750369,
      "grad_norm": 0.3200792372226715,
      "learning_rate": 1.1183643305825842e-05,
      "loss": 0.1807,
      "step": 7670
    },
    {
      "epoch": 2.832717872968981,
      "grad_norm": 0.27036356925964355,
      "learning_rate": 1.1159009730262348e-05,
      "loss": 0.1542,
      "step": 7671
    },
    {
      "epoch": 2.8330871491875924,
      "grad_norm": 0.30485615134239197,
      "learning_rate": 1.1134376154698856e-05,
      "loss": 0.1657,
      "step": 7672
    },
    {
      "epoch": 2.8334564254062036,
      "grad_norm": 0.27574265003204346,
      "learning_rate": 1.1109742579135362e-05,
      "loss": 0.1862,
      "step": 7673
    },
    {
      "epoch": 2.833825701624815,
      "grad_norm": 0.23522606492042542,
      "learning_rate": 1.108510900357187e-05,
      "loss": 0.1312,
      "step": 7674
    },
    {
      "epoch": 2.8341949778434268,
      "grad_norm": 0.32631388306617737,
      "learning_rate": 1.1060475428008376e-05,
      "loss": 0.1852,
      "step": 7675
    },
    {
      "epoch": 2.8345642540620384,
      "grad_norm": 0.2653501629829407,
      "learning_rate": 1.1035841852444882e-05,
      "loss": 0.1735,
      "step": 7676
    },
    {
      "epoch": 2.83493353028065,
      "grad_norm": 0.297904372215271,
      "learning_rate": 1.101120827688139e-05,
      "loss": 0.1549,
      "step": 7677
    },
    {
      "epoch": 2.8353028064992616,
      "grad_norm": 0.2230168879032135,
      "learning_rate": 1.0986574701317896e-05,
      "loss": 0.1477,
      "step": 7678
    },
    {
      "epoch": 2.835672082717873,
      "grad_norm": 0.3190532624721527,
      "learning_rate": 1.0961941125754404e-05,
      "loss": 0.1696,
      "step": 7679
    },
    {
      "epoch": 2.8360413589364843,
      "grad_norm": 0.2887504994869232,
      "learning_rate": 1.093730755019091e-05,
      "loss": 0.1545,
      "step": 7680
    },
    {
      "epoch": 2.836410635155096,
      "grad_norm": 0.31410741806030273,
      "learning_rate": 1.0912673974627418e-05,
      "loss": 0.1713,
      "step": 7681
    },
    {
      "epoch": 2.8367799113737076,
      "grad_norm": 0.36739683151245117,
      "learning_rate": 1.0888040399063924e-05,
      "loss": 0.1798,
      "step": 7682
    },
    {
      "epoch": 2.837149187592319,
      "grad_norm": 0.2992699146270752,
      "learning_rate": 1.0863406823500432e-05,
      "loss": 0.1807,
      "step": 7683
    },
    {
      "epoch": 2.8375184638109303,
      "grad_norm": 0.33379828929901123,
      "learning_rate": 1.0838773247936938e-05,
      "loss": 0.1869,
      "step": 7684
    },
    {
      "epoch": 2.837887740029542,
      "grad_norm": 0.31527650356292725,
      "learning_rate": 1.0814139672373446e-05,
      "loss": 0.1814,
      "step": 7685
    },
    {
      "epoch": 2.8382570162481535,
      "grad_norm": 0.27176427841186523,
      "learning_rate": 1.0789506096809952e-05,
      "loss": 0.1584,
      "step": 7686
    },
    {
      "epoch": 2.838626292466765,
      "grad_norm": 0.2806834876537323,
      "learning_rate": 1.076487252124646e-05,
      "loss": 0.1611,
      "step": 7687
    },
    {
      "epoch": 2.8389955686853767,
      "grad_norm": 0.20951002836227417,
      "learning_rate": 1.0740238945682966e-05,
      "loss": 0.1532,
      "step": 7688
    },
    {
      "epoch": 2.8393648449039883,
      "grad_norm": 0.25673583149909973,
      "learning_rate": 1.0715605370119474e-05,
      "loss": 0.1572,
      "step": 7689
    },
    {
      "epoch": 2.8397341211226,
      "grad_norm": 0.29751238226890564,
      "learning_rate": 1.069097179455598e-05,
      "loss": 0.1856,
      "step": 7690
    },
    {
      "epoch": 2.840103397341211,
      "grad_norm": 0.23384396731853485,
      "learning_rate": 1.0666338218992488e-05,
      "loss": 0.1369,
      "step": 7691
    },
    {
      "epoch": 2.8404726735598227,
      "grad_norm": 0.2613724172115326,
      "learning_rate": 1.0641704643428994e-05,
      "loss": 0.1743,
      "step": 7692
    },
    {
      "epoch": 2.8408419497784343,
      "grad_norm": 0.3152853548526764,
      "learning_rate": 1.0617071067865502e-05,
      "loss": 0.172,
      "step": 7693
    },
    {
      "epoch": 2.841211225997046,
      "grad_norm": 0.3350229859352112,
      "learning_rate": 1.0592437492302008e-05,
      "loss": 0.1894,
      "step": 7694
    },
    {
      "epoch": 2.841580502215657,
      "grad_norm": 0.29419779777526855,
      "learning_rate": 1.0567803916738515e-05,
      "loss": 0.1826,
      "step": 7695
    },
    {
      "epoch": 2.8419497784342687,
      "grad_norm": 0.2597488462924957,
      "learning_rate": 1.0543170341175022e-05,
      "loss": 0.1489,
      "step": 7696
    },
    {
      "epoch": 2.8423190546528803,
      "grad_norm": 0.3090327978134155,
      "learning_rate": 1.0518536765611529e-05,
      "loss": 0.1644,
      "step": 7697
    },
    {
      "epoch": 2.842688330871492,
      "grad_norm": 0.29547038674354553,
      "learning_rate": 1.0493903190048036e-05,
      "loss": 0.145,
      "step": 7698
    },
    {
      "epoch": 2.8430576070901035,
      "grad_norm": 0.3017313778400421,
      "learning_rate": 1.0469269614484543e-05,
      "loss": 0.2036,
      "step": 7699
    },
    {
      "epoch": 2.843426883308715,
      "grad_norm": 0.3118973970413208,
      "learning_rate": 1.044463603892105e-05,
      "loss": 0.1856,
      "step": 7700
    },
    {
      "epoch": 2.843426883308715,
      "eval_loss": 0.2449484020471573,
      "eval_runtime": 5.8622,
      "eval_samples_per_second": 8.529,
      "eval_steps_per_second": 1.194,
      "step": 7700
    },
    {
      "epoch": 2.8437961595273267,
      "grad_norm": 0.28383857011795044,
      "learning_rate": 1.0420002463357557e-05,
      "loss": 0.1486,
      "step": 7701
    },
    {
      "epoch": 2.844165435745938,
      "grad_norm": 0.31991198658943176,
      "learning_rate": 1.0395368887794065e-05,
      "loss": 0.19,
      "step": 7702
    },
    {
      "epoch": 2.8445347119645494,
      "grad_norm": 0.24715940654277802,
      "learning_rate": 1.037073531223057e-05,
      "loss": 0.1355,
      "step": 7703
    },
    {
      "epoch": 2.844903988183161,
      "grad_norm": 0.2981330156326294,
      "learning_rate": 1.0346101736667077e-05,
      "loss": 0.1694,
      "step": 7704
    },
    {
      "epoch": 2.8452732644017726,
      "grad_norm": 0.23327438533306122,
      "learning_rate": 1.0321468161103585e-05,
      "loss": 0.1678,
      "step": 7705
    },
    {
      "epoch": 2.845642540620384,
      "grad_norm": 0.26901179552078247,
      "learning_rate": 1.0296834585540091e-05,
      "loss": 0.168,
      "step": 7706
    },
    {
      "epoch": 2.8460118168389954,
      "grad_norm": 0.34340429306030273,
      "learning_rate": 1.0272201009976599e-05,
      "loss": 0.168,
      "step": 7707
    },
    {
      "epoch": 2.846381093057607,
      "grad_norm": 0.22643637657165527,
      "learning_rate": 1.0247567434413105e-05,
      "loss": 0.1397,
      "step": 7708
    },
    {
      "epoch": 2.8467503692762186,
      "grad_norm": 0.28315696120262146,
      "learning_rate": 1.0222933858849613e-05,
      "loss": 0.172,
      "step": 7709
    },
    {
      "epoch": 2.84711964549483,
      "grad_norm": 0.3400122821331024,
      "learning_rate": 1.0198300283286119e-05,
      "loss": 0.1544,
      "step": 7710
    },
    {
      "epoch": 2.847488921713442,
      "grad_norm": 0.3144240081310272,
      "learning_rate": 1.0173666707722627e-05,
      "loss": 0.1875,
      "step": 7711
    },
    {
      "epoch": 2.8478581979320534,
      "grad_norm": 0.25010183453559875,
      "learning_rate": 1.0149033132159133e-05,
      "loss": 0.1324,
      "step": 7712
    },
    {
      "epoch": 2.8482274741506646,
      "grad_norm": 0.37234553694725037,
      "learning_rate": 1.0124399556595641e-05,
      "loss": 0.2089,
      "step": 7713
    },
    {
      "epoch": 2.848596750369276,
      "grad_norm": 0.2791849970817566,
      "learning_rate": 1.0099765981032147e-05,
      "loss": 0.1557,
      "step": 7714
    },
    {
      "epoch": 2.848966026587888,
      "grad_norm": 0.2452075034379959,
      "learning_rate": 1.0075132405468655e-05,
      "loss": 0.1613,
      "step": 7715
    },
    {
      "epoch": 2.8493353028064994,
      "grad_norm": 0.28953418135643005,
      "learning_rate": 1.0050498829905161e-05,
      "loss": 0.1569,
      "step": 7716
    },
    {
      "epoch": 2.8497045790251105,
      "grad_norm": 0.30108311772346497,
      "learning_rate": 1.0025865254341669e-05,
      "loss": 0.1732,
      "step": 7717
    },
    {
      "epoch": 2.850073855243722,
      "grad_norm": 0.2359277755022049,
      "learning_rate": 1.0001231678778175e-05,
      "loss": 0.1729,
      "step": 7718
    },
    {
      "epoch": 2.8504431314623337,
      "grad_norm": 0.26641425490379333,
      "learning_rate": 9.976598103214683e-06,
      "loss": 0.1648,
      "step": 7719
    },
    {
      "epoch": 2.8508124076809453,
      "grad_norm": 0.27465370297431946,
      "learning_rate": 9.951964527651189e-06,
      "loss": 0.1641,
      "step": 7720
    },
    {
      "epoch": 2.851181683899557,
      "grad_norm": 0.24329142272472382,
      "learning_rate": 9.927330952087697e-06,
      "loss": 0.1469,
      "step": 7721
    },
    {
      "epoch": 2.8515509601181686,
      "grad_norm": 0.31522512435913086,
      "learning_rate": 9.902697376524203e-06,
      "loss": 0.1745,
      "step": 7722
    },
    {
      "epoch": 2.85192023633678,
      "grad_norm": 0.21894076466560364,
      "learning_rate": 9.87806380096071e-06,
      "loss": 0.1442,
      "step": 7723
    },
    {
      "epoch": 2.8522895125553913,
      "grad_norm": 0.30204495787620544,
      "learning_rate": 9.853430225397217e-06,
      "loss": 0.1674,
      "step": 7724
    },
    {
      "epoch": 2.852658788774003,
      "grad_norm": 0.27711284160614014,
      "learning_rate": 9.828796649833723e-06,
      "loss": 0.1433,
      "step": 7725
    },
    {
      "epoch": 2.8530280649926145,
      "grad_norm": 0.22089818120002747,
      "learning_rate": 9.804163074270231e-06,
      "loss": 0.1426,
      "step": 7726
    },
    {
      "epoch": 2.853397341211226,
      "grad_norm": 0.26761361956596375,
      "learning_rate": 9.779529498706737e-06,
      "loss": 0.1617,
      "step": 7727
    },
    {
      "epoch": 2.8537666174298373,
      "grad_norm": 0.37248560786247253,
      "learning_rate": 9.754895923143245e-06,
      "loss": 0.1982,
      "step": 7728
    },
    {
      "epoch": 2.854135893648449,
      "grad_norm": 0.23078663647174835,
      "learning_rate": 9.730262347579751e-06,
      "loss": 0.149,
      "step": 7729
    },
    {
      "epoch": 2.8545051698670605,
      "grad_norm": 0.3424321711063385,
      "learning_rate": 9.70562877201626e-06,
      "loss": 0.1956,
      "step": 7730
    },
    {
      "epoch": 2.854874446085672,
      "grad_norm": 0.25236353278160095,
      "learning_rate": 9.680995196452765e-06,
      "loss": 0.1605,
      "step": 7731
    },
    {
      "epoch": 2.8552437223042837,
      "grad_norm": 0.26007863879203796,
      "learning_rate": 9.656361620889273e-06,
      "loss": 0.1469,
      "step": 7732
    },
    {
      "epoch": 2.8556129985228953,
      "grad_norm": 0.2958352267742157,
      "learning_rate": 9.63172804532578e-06,
      "loss": 0.1836,
      "step": 7733
    },
    {
      "epoch": 2.855982274741507,
      "grad_norm": 0.2829585671424866,
      "learning_rate": 9.607094469762286e-06,
      "loss": 0.1508,
      "step": 7734
    },
    {
      "epoch": 2.856351550960118,
      "grad_norm": 0.2982698082923889,
      "learning_rate": 9.582460894198794e-06,
      "loss": 0.1783,
      "step": 7735
    },
    {
      "epoch": 2.8567208271787297,
      "grad_norm": 0.27404478192329407,
      "learning_rate": 9.5578273186353e-06,
      "loss": 0.1642,
      "step": 7736
    },
    {
      "epoch": 2.8570901033973413,
      "grad_norm": 0.23169001936912537,
      "learning_rate": 9.533193743071808e-06,
      "loss": 0.1622,
      "step": 7737
    },
    {
      "epoch": 2.857459379615953,
      "grad_norm": 0.2677990198135376,
      "learning_rate": 9.508560167508314e-06,
      "loss": 0.1703,
      "step": 7738
    },
    {
      "epoch": 2.857828655834564,
      "grad_norm": 0.33130165934562683,
      "learning_rate": 9.483926591944822e-06,
      "loss": 0.1868,
      "step": 7739
    },
    {
      "epoch": 2.8581979320531756,
      "grad_norm": 0.23286062479019165,
      "learning_rate": 9.459293016381328e-06,
      "loss": 0.1857,
      "step": 7740
    },
    {
      "epoch": 2.8585672082717872,
      "grad_norm": 0.25445547699928284,
      "learning_rate": 9.434659440817836e-06,
      "loss": 0.1432,
      "step": 7741
    },
    {
      "epoch": 2.858936484490399,
      "grad_norm": 0.4026498794555664,
      "learning_rate": 9.410025865254342e-06,
      "loss": 0.1975,
      "step": 7742
    },
    {
      "epoch": 2.8593057607090104,
      "grad_norm": 0.32120877504348755,
      "learning_rate": 9.38539228969085e-06,
      "loss": 0.1682,
      "step": 7743
    },
    {
      "epoch": 2.859675036927622,
      "grad_norm": 0.2434127777814865,
      "learning_rate": 9.360758714127356e-06,
      "loss": 0.1505,
      "step": 7744
    },
    {
      "epoch": 2.8600443131462336,
      "grad_norm": 0.2673363983631134,
      "learning_rate": 9.336125138563864e-06,
      "loss": 0.1641,
      "step": 7745
    },
    {
      "epoch": 2.860413589364845,
      "grad_norm": 0.2814323902130127,
      "learning_rate": 9.31149156300037e-06,
      "loss": 0.1733,
      "step": 7746
    },
    {
      "epoch": 2.8607828655834564,
      "grad_norm": 0.252278208732605,
      "learning_rate": 9.286857987436878e-06,
      "loss": 0.173,
      "step": 7747
    },
    {
      "epoch": 2.861152141802068,
      "grad_norm": 0.27015420794487,
      "learning_rate": 9.262224411873384e-06,
      "loss": 0.1658,
      "step": 7748
    },
    {
      "epoch": 2.8615214180206796,
      "grad_norm": 0.27484989166259766,
      "learning_rate": 9.237590836309892e-06,
      "loss": 0.164,
      "step": 7749
    },
    {
      "epoch": 2.8618906942392908,
      "grad_norm": 0.2758505046367645,
      "learning_rate": 9.212957260746398e-06,
      "loss": 0.1608,
      "step": 7750
    },
    {
      "epoch": 2.8618906942392908,
      "eval_loss": 0.24487631022930145,
      "eval_runtime": 5.8611,
      "eval_samples_per_second": 8.531,
      "eval_steps_per_second": 1.194,
      "step": 7750
    },
    {
      "epoch": 2.8622599704579024,
      "grad_norm": 0.30068379640579224,
      "learning_rate": 9.188323685182904e-06,
      "loss": 0.1789,
      "step": 7751
    },
    {
      "epoch": 2.862629246676514,
      "grad_norm": 0.28493115305900574,
      "learning_rate": 9.163690109619412e-06,
      "loss": 0.1527,
      "step": 7752
    },
    {
      "epoch": 2.8629985228951256,
      "grad_norm": 0.23973438143730164,
      "learning_rate": 9.139056534055918e-06,
      "loss": 0.1501,
      "step": 7753
    },
    {
      "epoch": 2.863367799113737,
      "grad_norm": 0.29730549454689026,
      "learning_rate": 9.114422958492426e-06,
      "loss": 0.1762,
      "step": 7754
    },
    {
      "epoch": 2.863737075332349,
      "grad_norm": 0.32645919919013977,
      "learning_rate": 9.089789382928932e-06,
      "loss": 0.1873,
      "step": 7755
    },
    {
      "epoch": 2.8641063515509604,
      "grad_norm": 0.26671698689460754,
      "learning_rate": 9.06515580736544e-06,
      "loss": 0.1432,
      "step": 7756
    },
    {
      "epoch": 2.8644756277695715,
      "grad_norm": 0.27620962262153625,
      "learning_rate": 9.040522231801946e-06,
      "loss": 0.1566,
      "step": 7757
    },
    {
      "epoch": 2.864844903988183,
      "grad_norm": 0.30255046486854553,
      "learning_rate": 9.015888656238454e-06,
      "loss": 0.1774,
      "step": 7758
    },
    {
      "epoch": 2.8652141802067947,
      "grad_norm": 0.2471076399087906,
      "learning_rate": 8.99125508067496e-06,
      "loss": 0.1641,
      "step": 7759
    },
    {
      "epoch": 2.8655834564254064,
      "grad_norm": 0.26162055134773254,
      "learning_rate": 8.966621505111468e-06,
      "loss": 0.1522,
      "step": 7760
    },
    {
      "epoch": 2.8659527326440175,
      "grad_norm": 0.27141493558883667,
      "learning_rate": 8.941987929547974e-06,
      "loss": 0.1752,
      "step": 7761
    },
    {
      "epoch": 2.866322008862629,
      "grad_norm": 0.2501200735569,
      "learning_rate": 8.91735435398448e-06,
      "loss": 0.1583,
      "step": 7762
    },
    {
      "epoch": 2.8666912850812407,
      "grad_norm": 0.24008040130138397,
      "learning_rate": 8.892720778420988e-06,
      "loss": 0.1421,
      "step": 7763
    },
    {
      "epoch": 2.8670605612998523,
      "grad_norm": 0.27715250849723816,
      "learning_rate": 8.868087202857494e-06,
      "loss": 0.1786,
      "step": 7764
    },
    {
      "epoch": 2.867429837518464,
      "grad_norm": 0.32403644919395447,
      "learning_rate": 8.843453627294002e-06,
      "loss": 0.1633,
      "step": 7765
    },
    {
      "epoch": 2.8677991137370755,
      "grad_norm": 0.3056527078151703,
      "learning_rate": 8.818820051730508e-06,
      "loss": 0.1882,
      "step": 7766
    },
    {
      "epoch": 2.868168389955687,
      "grad_norm": 0.28270432353019714,
      "learning_rate": 8.794186476167016e-06,
      "loss": 0.1688,
      "step": 7767
    },
    {
      "epoch": 2.8685376661742983,
      "grad_norm": 0.25626882910728455,
      "learning_rate": 8.769552900603523e-06,
      "loss": 0.1562,
      "step": 7768
    },
    {
      "epoch": 2.86890694239291,
      "grad_norm": 0.2904073894023895,
      "learning_rate": 8.74491932504003e-06,
      "loss": 0.1684,
      "step": 7769
    },
    {
      "epoch": 2.8692762186115215,
      "grad_norm": 0.3247445821762085,
      "learning_rate": 8.720285749476537e-06,
      "loss": 0.1763,
      "step": 7770
    },
    {
      "epoch": 2.869645494830133,
      "grad_norm": 0.28118449449539185,
      "learning_rate": 8.695652173913044e-06,
      "loss": 0.1753,
      "step": 7771
    },
    {
      "epoch": 2.8700147710487443,
      "grad_norm": 0.2690677046775818,
      "learning_rate": 8.67101859834955e-06,
      "loss": 0.1362,
      "step": 7772
    },
    {
      "epoch": 2.870384047267356,
      "grad_norm": 0.2865660786628723,
      "learning_rate": 8.646385022786058e-06,
      "loss": 0.1805,
      "step": 7773
    },
    {
      "epoch": 2.8707533234859675,
      "grad_norm": 0.23917242884635925,
      "learning_rate": 8.621751447222565e-06,
      "loss": 0.1732,
      "step": 7774
    },
    {
      "epoch": 2.871122599704579,
      "grad_norm": 0.23305903375148773,
      "learning_rate": 8.597117871659072e-06,
      "loss": 0.1614,
      "step": 7775
    },
    {
      "epoch": 2.8714918759231907,
      "grad_norm": 0.25651511549949646,
      "learning_rate": 8.572484296095579e-06,
      "loss": 0.1778,
      "step": 7776
    },
    {
      "epoch": 2.8718611521418023,
      "grad_norm": 0.252269983291626,
      "learning_rate": 8.547850720532087e-06,
      "loss": 0.1585,
      "step": 7777
    },
    {
      "epoch": 2.8722304283604134,
      "grad_norm": 0.2591896057128906,
      "learning_rate": 8.523217144968593e-06,
      "loss": 0.1766,
      "step": 7778
    },
    {
      "epoch": 2.872599704579025,
      "grad_norm": 0.26428085565567017,
      "learning_rate": 8.4985835694051e-06,
      "loss": 0.1432,
      "step": 7779
    },
    {
      "epoch": 2.8729689807976366,
      "grad_norm": 0.2558910846710205,
      "learning_rate": 8.473949993841607e-06,
      "loss": 0.1451,
      "step": 7780
    },
    {
      "epoch": 2.8733382570162482,
      "grad_norm": 0.37068071961402893,
      "learning_rate": 8.449316418278113e-06,
      "loss": 0.2287,
      "step": 7781
    },
    {
      "epoch": 2.87370753323486,
      "grad_norm": 0.21412429213523865,
      "learning_rate": 8.42468284271462e-06,
      "loss": 0.1313,
      "step": 7782
    },
    {
      "epoch": 2.874076809453471,
      "grad_norm": 0.278902530670166,
      "learning_rate": 8.400049267151127e-06,
      "loss": 0.1754,
      "step": 7783
    },
    {
      "epoch": 2.8744460856720826,
      "grad_norm": 0.27167651057243347,
      "learning_rate": 8.375415691587635e-06,
      "loss": 0.1608,
      "step": 7784
    },
    {
      "epoch": 2.874815361890694,
      "grad_norm": 0.236875981092453,
      "learning_rate": 8.350782116024141e-06,
      "loss": 0.1775,
      "step": 7785
    },
    {
      "epoch": 2.875184638109306,
      "grad_norm": 0.23661479353904724,
      "learning_rate": 8.326148540460649e-06,
      "loss": 0.1473,
      "step": 7786
    },
    {
      "epoch": 2.8755539143279174,
      "grad_norm": 0.27548691630363464,
      "learning_rate": 8.301514964897155e-06,
      "loss": 0.1524,
      "step": 7787
    },
    {
      "epoch": 2.875923190546529,
      "grad_norm": 0.19834963977336884,
      "learning_rate": 8.276881389333663e-06,
      "loss": 0.141,
      "step": 7788
    },
    {
      "epoch": 2.87629246676514,
      "grad_norm": 0.23704633116722107,
      "learning_rate": 8.252247813770169e-06,
      "loss": 0.1583,
      "step": 7789
    },
    {
      "epoch": 2.8766617429837518,
      "grad_norm": 0.2559584677219391,
      "learning_rate": 8.227614238206677e-06,
      "loss": 0.1426,
      "step": 7790
    },
    {
      "epoch": 2.8770310192023634,
      "grad_norm": 0.35006478428840637,
      "learning_rate": 8.202980662643183e-06,
      "loss": 0.2012,
      "step": 7791
    },
    {
      "epoch": 2.877400295420975,
      "grad_norm": 0.27662691473960876,
      "learning_rate": 8.17834708707969e-06,
      "loss": 0.1644,
      "step": 7792
    },
    {
      "epoch": 2.8777695716395866,
      "grad_norm": 0.2628805637359619,
      "learning_rate": 8.153713511516197e-06,
      "loss": 0.1658,
      "step": 7793
    },
    {
      "epoch": 2.8781388478581977,
      "grad_norm": 0.24730327725410461,
      "learning_rate": 8.129079935952703e-06,
      "loss": 0.1697,
      "step": 7794
    },
    {
      "epoch": 2.8785081240768093,
      "grad_norm": 0.2437671720981598,
      "learning_rate": 8.104446360389211e-06,
      "loss": 0.1514,
      "step": 7795
    },
    {
      "epoch": 2.878877400295421,
      "grad_norm": 0.2743469774723053,
      "learning_rate": 8.079812784825717e-06,
      "loss": 0.1659,
      "step": 7796
    },
    {
      "epoch": 2.8792466765140325,
      "grad_norm": 0.29006099700927734,
      "learning_rate": 8.055179209262225e-06,
      "loss": 0.1617,
      "step": 7797
    },
    {
      "epoch": 2.879615952732644,
      "grad_norm": 0.2561509311199188,
      "learning_rate": 8.030545633698731e-06,
      "loss": 0.1404,
      "step": 7798
    },
    {
      "epoch": 2.8799852289512557,
      "grad_norm": 0.269986093044281,
      "learning_rate": 8.00591205813524e-06,
      "loss": 0.1521,
      "step": 7799
    },
    {
      "epoch": 2.880354505169867,
      "grad_norm": 0.23220254480838776,
      "learning_rate": 7.981278482571745e-06,
      "loss": 0.1503,
      "step": 7800
    },
    {
      "epoch": 2.880354505169867,
      "eval_loss": 0.24527710676193237,
      "eval_runtime": 5.8643,
      "eval_samples_per_second": 8.526,
      "eval_steps_per_second": 1.194,
      "step": 7800
    },
    {
      "epoch": 2.8807237813884785,
      "grad_norm": 0.2728011906147003,
      "learning_rate": 7.956644907008253e-06,
      "loss": 0.1579,
      "step": 7801
    },
    {
      "epoch": 2.88109305760709,
      "grad_norm": 0.24128302931785583,
      "learning_rate": 7.93201133144476e-06,
      "loss": 0.127,
      "step": 7802
    },
    {
      "epoch": 2.8814623338257017,
      "grad_norm": 0.2598339319229126,
      "learning_rate": 7.907377755881267e-06,
      "loss": 0.1582,
      "step": 7803
    },
    {
      "epoch": 2.881831610044313,
      "grad_norm": 0.30559489130973816,
      "learning_rate": 7.882744180317773e-06,
      "loss": 0.1755,
      "step": 7804
    },
    {
      "epoch": 2.8822008862629245,
      "grad_norm": 0.25795549154281616,
      "learning_rate": 7.858110604754281e-06,
      "loss": 0.173,
      "step": 7805
    },
    {
      "epoch": 2.882570162481536,
      "grad_norm": 0.30176594853401184,
      "learning_rate": 7.833477029190787e-06,
      "loss": 0.1725,
      "step": 7806
    },
    {
      "epoch": 2.8829394387001477,
      "grad_norm": 0.2542456388473511,
      "learning_rate": 7.808843453627295e-06,
      "loss": 0.1563,
      "step": 7807
    },
    {
      "epoch": 2.8833087149187593,
      "grad_norm": 0.26239079236984253,
      "learning_rate": 7.784209878063801e-06,
      "loss": 0.1751,
      "step": 7808
    },
    {
      "epoch": 2.883677991137371,
      "grad_norm": 0.25571733713150024,
      "learning_rate": 7.759576302500308e-06,
      "loss": 0.1665,
      "step": 7809
    },
    {
      "epoch": 2.8840472673559825,
      "grad_norm": 0.24270211160182953,
      "learning_rate": 7.734942726936816e-06,
      "loss": 0.173,
      "step": 7810
    },
    {
      "epoch": 2.8844165435745936,
      "grad_norm": 0.20862916111946106,
      "learning_rate": 7.710309151373322e-06,
      "loss": 0.1416,
      "step": 7811
    },
    {
      "epoch": 2.8847858197932053,
      "grad_norm": 0.2596467137336731,
      "learning_rate": 7.68567557580983e-06,
      "loss": 0.1754,
      "step": 7812
    },
    {
      "epoch": 2.885155096011817,
      "grad_norm": 0.3205491602420807,
      "learning_rate": 7.661042000246336e-06,
      "loss": 0.1826,
      "step": 7813
    },
    {
      "epoch": 2.8855243722304285,
      "grad_norm": 0.25602033734321594,
      "learning_rate": 7.636408424682844e-06,
      "loss": 0.1502,
      "step": 7814
    },
    {
      "epoch": 2.8858936484490396,
      "grad_norm": 0.261232852935791,
      "learning_rate": 7.611774849119351e-06,
      "loss": 0.1537,
      "step": 7815
    },
    {
      "epoch": 2.886262924667651,
      "grad_norm": 0.27913686633110046,
      "learning_rate": 7.587141273555858e-06,
      "loss": 0.1434,
      "step": 7816
    },
    {
      "epoch": 2.886632200886263,
      "grad_norm": 0.25788408517837524,
      "learning_rate": 7.562507697992365e-06,
      "loss": 0.1756,
      "step": 7817
    },
    {
      "epoch": 2.8870014771048744,
      "grad_norm": 0.2901060879230499,
      "learning_rate": 7.537874122428872e-06,
      "loss": 0.1842,
      "step": 7818
    },
    {
      "epoch": 2.887370753323486,
      "grad_norm": 0.2752688527107239,
      "learning_rate": 7.513240546865379e-06,
      "loss": 0.1801,
      "step": 7819
    },
    {
      "epoch": 2.8877400295420976,
      "grad_norm": 0.26344338059425354,
      "learning_rate": 7.488606971301884e-06,
      "loss": 0.1793,
      "step": 7820
    },
    {
      "epoch": 2.8881093057607092,
      "grad_norm": 0.2962002754211426,
      "learning_rate": 7.463973395738391e-06,
      "loss": 0.1896,
      "step": 7821
    },
    {
      "epoch": 2.8884785819793204,
      "grad_norm": 0.27300742268562317,
      "learning_rate": 7.439339820174898e-06,
      "loss": 0.1587,
      "step": 7822
    },
    {
      "epoch": 2.888847858197932,
      "grad_norm": 0.25111380219459534,
      "learning_rate": 7.414706244611405e-06,
      "loss": 0.1764,
      "step": 7823
    },
    {
      "epoch": 2.8892171344165436,
      "grad_norm": 0.2565227746963501,
      "learning_rate": 7.390072669047912e-06,
      "loss": 0.1544,
      "step": 7824
    },
    {
      "epoch": 2.889586410635155,
      "grad_norm": 0.27793753147125244,
      "learning_rate": 7.365439093484419e-06,
      "loss": 0.1584,
      "step": 7825
    },
    {
      "epoch": 2.8899556868537664,
      "grad_norm": 0.2671755850315094,
      "learning_rate": 7.340805517920926e-06,
      "loss": 0.1458,
      "step": 7826
    },
    {
      "epoch": 2.890324963072378,
      "grad_norm": 0.26163250207901,
      "learning_rate": 7.316171942357433e-06,
      "loss": 0.1607,
      "step": 7827
    },
    {
      "epoch": 2.8906942392909896,
      "grad_norm": 0.29872065782546997,
      "learning_rate": 7.29153836679394e-06,
      "loss": 0.1563,
      "step": 7828
    },
    {
      "epoch": 2.891063515509601,
      "grad_norm": 0.24086235463619232,
      "learning_rate": 7.266904791230447e-06,
      "loss": 0.1411,
      "step": 7829
    },
    {
      "epoch": 2.8914327917282128,
      "grad_norm": 0.2317669689655304,
      "learning_rate": 7.242271215666954e-06,
      "loss": 0.1401,
      "step": 7830
    },
    {
      "epoch": 2.8918020679468244,
      "grad_norm": 0.24406473338603973,
      "learning_rate": 7.217637640103461e-06,
      "loss": 0.1528,
      "step": 7831
    },
    {
      "epoch": 2.892171344165436,
      "grad_norm": 0.24135088920593262,
      "learning_rate": 7.193004064539968e-06,
      "loss": 0.1949,
      "step": 7832
    },
    {
      "epoch": 2.892540620384047,
      "grad_norm": 0.305979460477829,
      "learning_rate": 7.168370488976475e-06,
      "loss": 0.1796,
      "step": 7833
    },
    {
      "epoch": 2.8929098966026587,
      "grad_norm": 0.2573777437210083,
      "learning_rate": 7.143736913412982e-06,
      "loss": 0.1656,
      "step": 7834
    },
    {
      "epoch": 2.8932791728212703,
      "grad_norm": 0.2556525766849518,
      "learning_rate": 7.119103337849489e-06,
      "loss": 0.1697,
      "step": 7835
    },
    {
      "epoch": 2.893648449039882,
      "grad_norm": 0.2672213613986969,
      "learning_rate": 7.094469762285996e-06,
      "loss": 0.1663,
      "step": 7836
    },
    {
      "epoch": 2.894017725258493,
      "grad_norm": 0.2884562015533447,
      "learning_rate": 7.069836186722503e-06,
      "loss": 0.1838,
      "step": 7837
    },
    {
      "epoch": 2.8943870014771047,
      "grad_norm": 0.27062246203422546,
      "learning_rate": 7.04520261115901e-06,
      "loss": 0.1629,
      "step": 7838
    },
    {
      "epoch": 2.8947562776957163,
      "grad_norm": 0.2844317853450775,
      "learning_rate": 7.020569035595517e-06,
      "loss": 0.1759,
      "step": 7839
    },
    {
      "epoch": 2.895125553914328,
      "grad_norm": 0.2508607506752014,
      "learning_rate": 6.995935460032024e-06,
      "loss": 0.167,
      "step": 7840
    },
    {
      "epoch": 2.8954948301329395,
      "grad_norm": 0.23912623524665833,
      "learning_rate": 6.971301884468531e-06,
      "loss": 0.1729,
      "step": 7841
    },
    {
      "epoch": 2.895864106351551,
      "grad_norm": 0.24652552604675293,
      "learning_rate": 6.946668308905038e-06,
      "loss": 0.157,
      "step": 7842
    },
    {
      "epoch": 2.8962333825701627,
      "grad_norm": 0.3296917676925659,
      "learning_rate": 6.922034733341545e-06,
      "loss": 0.172,
      "step": 7843
    },
    {
      "epoch": 2.896602658788774,
      "grad_norm": 0.2623459994792938,
      "learning_rate": 6.897401157778052e-06,
      "loss": 0.165,
      "step": 7844
    },
    {
      "epoch": 2.8969719350073855,
      "grad_norm": 0.19621816277503967,
      "learning_rate": 6.872767582214559e-06,
      "loss": 0.1286,
      "step": 7845
    },
    {
      "epoch": 2.897341211225997,
      "grad_norm": 0.31013402342796326,
      "learning_rate": 6.848134006651066e-06,
      "loss": 0.164,
      "step": 7846
    },
    {
      "epoch": 2.8977104874446087,
      "grad_norm": 0.2559880018234253,
      "learning_rate": 6.823500431087573e-06,
      "loss": 0.1622,
      "step": 7847
    },
    {
      "epoch": 2.89807976366322,
      "grad_norm": 0.31077656149864197,
      "learning_rate": 6.7988668555240804e-06,
      "loss": 0.1863,
      "step": 7848
    },
    {
      "epoch": 2.8984490398818314,
      "grad_norm": 0.29186683893203735,
      "learning_rate": 6.774233279960586e-06,
      "loss": 0.1564,
      "step": 7849
    },
    {
      "epoch": 2.898818316100443,
      "grad_norm": 0.35916706919670105,
      "learning_rate": 6.749599704397093e-06,
      "loss": 0.1862,
      "step": 7850
    },
    {
      "epoch": 2.898818316100443,
      "eval_loss": 0.2451774775981903,
      "eval_runtime": 5.8467,
      "eval_samples_per_second": 8.552,
      "eval_steps_per_second": 1.197,
      "step": 7850
    },
    {
      "epoch": 2.8991875923190547,
      "grad_norm": 0.3264926075935364,
      "learning_rate": 6.7249661288336e-06,
      "loss": 0.1918,
      "step": 7851
    },
    {
      "epoch": 2.8995568685376663,
      "grad_norm": 0.248603954911232,
      "learning_rate": 6.700332553270107e-06,
      "loss": 0.1441,
      "step": 7852
    },
    {
      "epoch": 2.899926144756278,
      "grad_norm": 0.23523470759391785,
      "learning_rate": 6.675698977706614e-06,
      "loss": 0.1565,
      "step": 7853
    },
    {
      "epoch": 2.9002954209748895,
      "grad_norm": 0.29151037335395813,
      "learning_rate": 6.651065402143121e-06,
      "loss": 0.1666,
      "step": 7854
    },
    {
      "epoch": 2.9006646971935006,
      "grad_norm": 0.2865246832370758,
      "learning_rate": 6.626431826579628e-06,
      "loss": 0.1907,
      "step": 7855
    },
    {
      "epoch": 2.901033973412112,
      "grad_norm": 0.29374998807907104,
      "learning_rate": 6.601798251016135e-06,
      "loss": 0.1568,
      "step": 7856
    },
    {
      "epoch": 2.901403249630724,
      "grad_norm": 0.23669621348381042,
      "learning_rate": 6.577164675452642e-06,
      "loss": 0.1433,
      "step": 7857
    },
    {
      "epoch": 2.9017725258493354,
      "grad_norm": 0.28078481554985046,
      "learning_rate": 6.552531099889149e-06,
      "loss": 0.1436,
      "step": 7858
    },
    {
      "epoch": 2.9021418020679466,
      "grad_norm": 0.24589957296848297,
      "learning_rate": 6.527897524325656e-06,
      "loss": 0.1601,
      "step": 7859
    },
    {
      "epoch": 2.902511078286558,
      "grad_norm": 0.2863220274448395,
      "learning_rate": 6.503263948762163e-06,
      "loss": 0.1764,
      "step": 7860
    },
    {
      "epoch": 2.90288035450517,
      "grad_norm": 0.29853659868240356,
      "learning_rate": 6.47863037319867e-06,
      "loss": 0.1868,
      "step": 7861
    },
    {
      "epoch": 2.9032496307237814,
      "grad_norm": 0.30052450299263,
      "learning_rate": 6.453996797635177e-06,
      "loss": 0.1709,
      "step": 7862
    },
    {
      "epoch": 2.903618906942393,
      "grad_norm": 0.34883439540863037,
      "learning_rate": 6.429363222071684e-06,
      "loss": 0.1976,
      "step": 7863
    },
    {
      "epoch": 2.9039881831610046,
      "grad_norm": 0.2708515226840973,
      "learning_rate": 6.404729646508191e-06,
      "loss": 0.1768,
      "step": 7864
    },
    {
      "epoch": 2.904357459379616,
      "grad_norm": 0.24446143209934235,
      "learning_rate": 6.380096070944698e-06,
      "loss": 0.144,
      "step": 7865
    },
    {
      "epoch": 2.9047267355982274,
      "grad_norm": 0.2536332309246063,
      "learning_rate": 6.355462495381205e-06,
      "loss": 0.1518,
      "step": 7866
    },
    {
      "epoch": 2.905096011816839,
      "grad_norm": 0.2545977532863617,
      "learning_rate": 6.330828919817712e-06,
      "loss": 0.1563,
      "step": 7867
    },
    {
      "epoch": 2.9054652880354506,
      "grad_norm": 0.3001493513584137,
      "learning_rate": 6.306195344254219e-06,
      "loss": 0.1725,
      "step": 7868
    },
    {
      "epoch": 2.905834564254062,
      "grad_norm": 0.34021979570388794,
      "learning_rate": 6.281561768690726e-06,
      "loss": 0.1953,
      "step": 7869
    },
    {
      "epoch": 2.9062038404726733,
      "grad_norm": 0.2688051760196686,
      "learning_rate": 6.256928193127233e-06,
      "loss": 0.1415,
      "step": 7870
    },
    {
      "epoch": 2.906573116691285,
      "grad_norm": 0.28454554080963135,
      "learning_rate": 6.232294617563739e-06,
      "loss": 0.1682,
      "step": 7871
    },
    {
      "epoch": 2.9069423929098965,
      "grad_norm": 0.2471422255039215,
      "learning_rate": 6.207661042000246e-06,
      "loss": 0.1544,
      "step": 7872
    },
    {
      "epoch": 2.907311669128508,
      "grad_norm": 0.29998016357421875,
      "learning_rate": 6.183027466436753e-06,
      "loss": 0.1577,
      "step": 7873
    },
    {
      "epoch": 2.9076809453471197,
      "grad_norm": 0.33225128054618835,
      "learning_rate": 6.15839389087326e-06,
      "loss": 0.1755,
      "step": 7874
    },
    {
      "epoch": 2.9080502215657313,
      "grad_norm": 0.3006001114845276,
      "learning_rate": 6.133760315309767e-06,
      "loss": 0.1706,
      "step": 7875
    },
    {
      "epoch": 2.908419497784343,
      "grad_norm": 0.2629902958869934,
      "learning_rate": 6.109126739746274e-06,
      "loss": 0.17,
      "step": 7876
    },
    {
      "epoch": 2.908788774002954,
      "grad_norm": 0.2456972301006317,
      "learning_rate": 6.084493164182781e-06,
      "loss": 0.1604,
      "step": 7877
    },
    {
      "epoch": 2.9091580502215657,
      "grad_norm": 0.283809632062912,
      "learning_rate": 6.059859588619288e-06,
      "loss": 0.1541,
      "step": 7878
    },
    {
      "epoch": 2.9095273264401773,
      "grad_norm": 0.2507307231426239,
      "learning_rate": 6.035226013055795e-06,
      "loss": 0.1523,
      "step": 7879
    },
    {
      "epoch": 2.909896602658789,
      "grad_norm": 0.25242727994918823,
      "learning_rate": 6.010592437492302e-06,
      "loss": 0.1497,
      "step": 7880
    },
    {
      "epoch": 2.9102658788774,
      "grad_norm": 0.27692046761512756,
      "learning_rate": 5.9859588619288094e-06,
      "loss": 0.1539,
      "step": 7881
    },
    {
      "epoch": 2.9106351550960117,
      "grad_norm": 0.26622235774993896,
      "learning_rate": 5.9613252863653164e-06,
      "loss": 0.1691,
      "step": 7882
    },
    {
      "epoch": 2.9110044313146233,
      "grad_norm": 0.21173381805419922,
      "learning_rate": 5.9366917108018235e-06,
      "loss": 0.1424,
      "step": 7883
    },
    {
      "epoch": 2.911373707533235,
      "grad_norm": 0.26915299892425537,
      "learning_rate": 5.9120581352383305e-06,
      "loss": 0.1662,
      "step": 7884
    },
    {
      "epoch": 2.9117429837518465,
      "grad_norm": 0.28042080998420715,
      "learning_rate": 5.887424559674837e-06,
      "loss": 0.1239,
      "step": 7885
    },
    {
      "epoch": 2.912112259970458,
      "grad_norm": 0.2873269319534302,
      "learning_rate": 5.862790984111344e-06,
      "loss": 0.169,
      "step": 7886
    },
    {
      "epoch": 2.9124815361890697,
      "grad_norm": 0.2641640603542328,
      "learning_rate": 5.838157408547851e-06,
      "loss": 0.1602,
      "step": 7887
    },
    {
      "epoch": 2.912850812407681,
      "grad_norm": 0.288686603307724,
      "learning_rate": 5.813523832984358e-06,
      "loss": 0.1896,
      "step": 7888
    },
    {
      "epoch": 2.9132200886262924,
      "grad_norm": 0.2504146099090576,
      "learning_rate": 5.788890257420865e-06,
      "loss": 0.1641,
      "step": 7889
    },
    {
      "epoch": 2.913589364844904,
      "grad_norm": 0.2569611966609955,
      "learning_rate": 5.764256681857372e-06,
      "loss": 0.1658,
      "step": 7890
    },
    {
      "epoch": 2.9139586410635157,
      "grad_norm": 0.24803724884986877,
      "learning_rate": 5.739623106293879e-06,
      "loss": 0.1536,
      "step": 7891
    },
    {
      "epoch": 2.914327917282127,
      "grad_norm": 0.2211741954088211,
      "learning_rate": 5.714989530730386e-06,
      "loss": 0.1344,
      "step": 7892
    },
    {
      "epoch": 2.9146971935007384,
      "grad_norm": 0.3000262975692749,
      "learning_rate": 5.690355955166893e-06,
      "loss": 0.174,
      "step": 7893
    },
    {
      "epoch": 2.91506646971935,
      "grad_norm": 0.2736184298992157,
      "learning_rate": 5.6657223796034e-06,
      "loss": 0.1526,
      "step": 7894
    },
    {
      "epoch": 2.9154357459379616,
      "grad_norm": 0.30402082204818726,
      "learning_rate": 5.641088804039907e-06,
      "loss": 0.1604,
      "step": 7895
    },
    {
      "epoch": 2.9158050221565732,
      "grad_norm": 0.3075074851512909,
      "learning_rate": 5.616455228476414e-06,
      "loss": 0.1956,
      "step": 7896
    },
    {
      "epoch": 2.916174298375185,
      "grad_norm": 0.27981919050216675,
      "learning_rate": 5.591821652912921e-06,
      "loss": 0.1562,
      "step": 7897
    },
    {
      "epoch": 2.9165435745937964,
      "grad_norm": 0.2536432445049286,
      "learning_rate": 5.567188077349428e-06,
      "loss": 0.1686,
      "step": 7898
    },
    {
      "epoch": 2.9169128508124076,
      "grad_norm": 0.23276817798614502,
      "learning_rate": 5.542554501785935e-06,
      "loss": 0.1457,
      "step": 7899
    },
    {
      "epoch": 2.917282127031019,
      "grad_norm": 0.31259551644325256,
      "learning_rate": 5.517920926222441e-06,
      "loss": 0.1826,
      "step": 7900
    },
    {
      "epoch": 2.917282127031019,
      "eval_loss": 0.24526342749595642,
      "eval_runtime": 5.8635,
      "eval_samples_per_second": 8.527,
      "eval_steps_per_second": 1.194,
      "step": 7900
    },
    {
      "epoch": 2.917651403249631,
      "grad_norm": 0.25709274411201477,
      "learning_rate": 5.493287350658948e-06,
      "loss": 0.1402,
      "step": 7901
    },
    {
      "epoch": 2.9180206794682424,
      "grad_norm": 0.29496487975120544,
      "learning_rate": 5.468653775095455e-06,
      "loss": 0.2003,
      "step": 7902
    },
    {
      "epoch": 2.9183899556868536,
      "grad_norm": 0.26852312684059143,
      "learning_rate": 5.444020199531962e-06,
      "loss": 0.1718,
      "step": 7903
    },
    {
      "epoch": 2.918759231905465,
      "grad_norm": 0.24652555584907532,
      "learning_rate": 5.419386623968469e-06,
      "loss": 0.1455,
      "step": 7904
    },
    {
      "epoch": 2.9191285081240768,
      "grad_norm": 0.2851690649986267,
      "learning_rate": 5.394753048404976e-06,
      "loss": 0.1607,
      "step": 7905
    },
    {
      "epoch": 2.9194977843426884,
      "grad_norm": 0.26648518443107605,
      "learning_rate": 5.370119472841483e-06,
      "loss": 0.1223,
      "step": 7906
    },
    {
      "epoch": 2.9198670605613,
      "grad_norm": 0.2626606523990631,
      "learning_rate": 5.34548589727799e-06,
      "loss": 0.1591,
      "step": 7907
    },
    {
      "epoch": 2.9202363367799116,
      "grad_norm": 0.2880263030529022,
      "learning_rate": 5.320852321714497e-06,
      "loss": 0.1542,
      "step": 7908
    },
    {
      "epoch": 2.920605612998523,
      "grad_norm": 0.2149953842163086,
      "learning_rate": 5.296218746151004e-06,
      "loss": 0.1494,
      "step": 7909
    },
    {
      "epoch": 2.9209748892171343,
      "grad_norm": 0.26451849937438965,
      "learning_rate": 5.271585170587511e-06,
      "loss": 0.1557,
      "step": 7910
    },
    {
      "epoch": 2.921344165435746,
      "grad_norm": 0.28757619857788086,
      "learning_rate": 5.246951595024018e-06,
      "loss": 0.1918,
      "step": 7911
    },
    {
      "epoch": 2.9217134416543575,
      "grad_norm": 0.24943974614143372,
      "learning_rate": 5.222318019460525e-06,
      "loss": 0.1522,
      "step": 7912
    },
    {
      "epoch": 2.922082717872969,
      "grad_norm": 0.2756868004798889,
      "learning_rate": 5.197684443897032e-06,
      "loss": 0.166,
      "step": 7913
    },
    {
      "epoch": 2.9224519940915803,
      "grad_norm": 0.2607571482658386,
      "learning_rate": 5.1730508683335384e-06,
      "loss": 0.1528,
      "step": 7914
    },
    {
      "epoch": 2.922821270310192,
      "grad_norm": 0.28976476192474365,
      "learning_rate": 5.1484172927700454e-06,
      "loss": 0.1536,
      "step": 7915
    },
    {
      "epoch": 2.9231905465288035,
      "grad_norm": 0.29689449071884155,
      "learning_rate": 5.1237837172065525e-06,
      "loss": 0.1605,
      "step": 7916
    },
    {
      "epoch": 2.923559822747415,
      "grad_norm": 0.2582589387893677,
      "learning_rate": 5.0991501416430595e-06,
      "loss": 0.1748,
      "step": 7917
    },
    {
      "epoch": 2.9239290989660267,
      "grad_norm": 0.2462097406387329,
      "learning_rate": 5.0745165660795665e-06,
      "loss": 0.1457,
      "step": 7918
    },
    {
      "epoch": 2.9242983751846383,
      "grad_norm": 0.27874815464019775,
      "learning_rate": 5.0498829905160735e-06,
      "loss": 0.1626,
      "step": 7919
    },
    {
      "epoch": 2.9246676514032495,
      "grad_norm": 0.24357378482818604,
      "learning_rate": 5.0252494149525805e-06,
      "loss": 0.1652,
      "step": 7920
    },
    {
      "epoch": 2.925036927621861,
      "grad_norm": 0.35426661372184753,
      "learning_rate": 5.0006158393890875e-06,
      "loss": 0.1592,
      "step": 7921
    },
    {
      "epoch": 2.9254062038404727,
      "grad_norm": 0.2852160632610321,
      "learning_rate": 4.9759822638255946e-06,
      "loss": 0.1733,
      "step": 7922
    },
    {
      "epoch": 2.9257754800590843,
      "grad_norm": 0.2774888873100281,
      "learning_rate": 4.9513486882621016e-06,
      "loss": 0.1716,
      "step": 7923
    },
    {
      "epoch": 2.926144756277696,
      "grad_norm": 0.2971727252006531,
      "learning_rate": 4.926715112698609e-06,
      "loss": 0.1473,
      "step": 7924
    },
    {
      "epoch": 2.926514032496307,
      "grad_norm": 0.25789374113082886,
      "learning_rate": 4.902081537135116e-06,
      "loss": 0.1656,
      "step": 7925
    },
    {
      "epoch": 2.9268833087149186,
      "grad_norm": 0.28932541608810425,
      "learning_rate": 4.877447961571623e-06,
      "loss": 0.1701,
      "step": 7926
    },
    {
      "epoch": 2.9272525849335302,
      "grad_norm": 0.30476686358451843,
      "learning_rate": 4.85281438600813e-06,
      "loss": 0.1594,
      "step": 7927
    },
    {
      "epoch": 2.927621861152142,
      "grad_norm": 0.2554304003715515,
      "learning_rate": 4.828180810444637e-06,
      "loss": 0.1516,
      "step": 7928
    },
    {
      "epoch": 2.9279911373707534,
      "grad_norm": 0.3235216438770294,
      "learning_rate": 4.803547234881143e-06,
      "loss": 0.1785,
      "step": 7929
    },
    {
      "epoch": 2.928360413589365,
      "grad_norm": 0.26630640029907227,
      "learning_rate": 4.77891365931765e-06,
      "loss": 0.1499,
      "step": 7930
    },
    {
      "epoch": 2.928729689807976,
      "grad_norm": 0.2978435754776001,
      "learning_rate": 4.754280083754157e-06,
      "loss": 0.1697,
      "step": 7931
    },
    {
      "epoch": 2.929098966026588,
      "grad_norm": 0.2978525161743164,
      "learning_rate": 4.729646508190664e-06,
      "loss": 0.1674,
      "step": 7932
    },
    {
      "epoch": 2.9294682422451994,
      "grad_norm": 0.3201218247413635,
      "learning_rate": 4.705012932627171e-06,
      "loss": 0.201,
      "step": 7933
    },
    {
      "epoch": 2.929837518463811,
      "grad_norm": 0.27196231484413147,
      "learning_rate": 4.680379357063678e-06,
      "loss": 0.162,
      "step": 7934
    },
    {
      "epoch": 2.930206794682422,
      "grad_norm": 0.28881552815437317,
      "learning_rate": 4.655745781500185e-06,
      "loss": 0.1804,
      "step": 7935
    },
    {
      "epoch": 2.930576070901034,
      "grad_norm": 0.2389811873435974,
      "learning_rate": 4.631112205936692e-06,
      "loss": 0.158,
      "step": 7936
    },
    {
      "epoch": 2.9309453471196454,
      "grad_norm": 0.25321751832962036,
      "learning_rate": 4.606478630373199e-06,
      "loss": 0.1604,
      "step": 7937
    },
    {
      "epoch": 2.931314623338257,
      "grad_norm": 0.27187028527259827,
      "learning_rate": 4.581845054809706e-06,
      "loss": 0.178,
      "step": 7938
    },
    {
      "epoch": 2.9316838995568686,
      "grad_norm": 0.2438676357269287,
      "learning_rate": 4.557211479246213e-06,
      "loss": 0.1566,
      "step": 7939
    },
    {
      "epoch": 2.93205317577548,
      "grad_norm": 0.2593154013156891,
      "learning_rate": 4.53257790368272e-06,
      "loss": 0.1763,
      "step": 7940
    },
    {
      "epoch": 2.932422451994092,
      "grad_norm": 0.2560650408267975,
      "learning_rate": 4.507944328119227e-06,
      "loss": 0.145,
      "step": 7941
    },
    {
      "epoch": 2.932791728212703,
      "grad_norm": 0.25635337829589844,
      "learning_rate": 4.483310752555734e-06,
      "loss": 0.1524,
      "step": 7942
    },
    {
      "epoch": 2.9331610044313146,
      "grad_norm": 0.23034490644931793,
      "learning_rate": 4.45867717699224e-06,
      "loss": 0.1479,
      "step": 7943
    },
    {
      "epoch": 2.933530280649926,
      "grad_norm": 0.25828734040260315,
      "learning_rate": 4.434043601428747e-06,
      "loss": 0.1819,
      "step": 7944
    },
    {
      "epoch": 2.9338995568685378,
      "grad_norm": 0.30721622705459595,
      "learning_rate": 4.409410025865254e-06,
      "loss": 0.1782,
      "step": 7945
    },
    {
      "epoch": 2.934268833087149,
      "grad_norm": 0.2951761484146118,
      "learning_rate": 4.384776450301761e-06,
      "loss": 0.1616,
      "step": 7946
    },
    {
      "epoch": 2.9346381093057605,
      "grad_norm": 0.2366657704114914,
      "learning_rate": 4.360142874738268e-06,
      "loss": 0.1546,
      "step": 7947
    },
    {
      "epoch": 2.935007385524372,
      "grad_norm": 0.30123084783554077,
      "learning_rate": 4.335509299174775e-06,
      "loss": 0.2079,
      "step": 7948
    },
    {
      "epoch": 2.9353766617429837,
      "grad_norm": 0.2627218961715698,
      "learning_rate": 4.310875723611282e-06,
      "loss": 0.1651,
      "step": 7949
    },
    {
      "epoch": 2.9357459379615953,
      "grad_norm": 0.30095189809799194,
      "learning_rate": 4.286242148047789e-06,
      "loss": 0.1615,
      "step": 7950
    },
    {
      "epoch": 2.9357459379615953,
      "eval_loss": 0.24511057138442993,
      "eval_runtime": 5.8646,
      "eval_samples_per_second": 8.526,
      "eval_steps_per_second": 1.194,
      "step": 7950
    },
    {
      "epoch": 2.936115214180207,
      "grad_norm": 0.320188969373703,
      "learning_rate": 4.261608572484296e-06,
      "loss": 0.1659,
      "step": 7951
    },
    {
      "epoch": 2.9364844903988185,
      "grad_norm": 0.25572508573532104,
      "learning_rate": 4.236974996920803e-06,
      "loss": 0.1477,
      "step": 7952
    },
    {
      "epoch": 2.9368537666174297,
      "grad_norm": 0.27010729908943176,
      "learning_rate": 4.21234142135731e-06,
      "loss": 0.1732,
      "step": 7953
    },
    {
      "epoch": 2.9372230428360413,
      "grad_norm": 0.32194846868515015,
      "learning_rate": 4.187707845793817e-06,
      "loss": 0.159,
      "step": 7954
    },
    {
      "epoch": 2.937592319054653,
      "grad_norm": 0.2527812123298645,
      "learning_rate": 4.163074270230324e-06,
      "loss": 0.1726,
      "step": 7955
    },
    {
      "epoch": 2.9379615952732645,
      "grad_norm": 0.2795100808143616,
      "learning_rate": 4.138440694666831e-06,
      "loss": 0.1746,
      "step": 7956
    },
    {
      "epoch": 2.9383308714918757,
      "grad_norm": 0.2902645766735077,
      "learning_rate": 4.1138071191033384e-06,
      "loss": 0.1647,
      "step": 7957
    },
    {
      "epoch": 2.9387001477104873,
      "grad_norm": 0.27305081486701965,
      "learning_rate": 4.089173543539845e-06,
      "loss": 0.1497,
      "step": 7958
    },
    {
      "epoch": 2.939069423929099,
      "grad_norm": 0.2554760277271271,
      "learning_rate": 4.064539967976352e-06,
      "loss": 0.1392,
      "step": 7959
    },
    {
      "epoch": 2.9394387001477105,
      "grad_norm": 0.25168025493621826,
      "learning_rate": 4.039906392412859e-06,
      "loss": 0.1522,
      "step": 7960
    },
    {
      "epoch": 2.939807976366322,
      "grad_norm": 0.22683395445346832,
      "learning_rate": 4.015272816849366e-06,
      "loss": 0.139,
      "step": 7961
    },
    {
      "epoch": 2.9401772525849337,
      "grad_norm": 0.2923617959022522,
      "learning_rate": 3.990639241285873e-06,
      "loss": 0.1802,
      "step": 7962
    },
    {
      "epoch": 2.9405465288035453,
      "grad_norm": 0.2750331163406372,
      "learning_rate": 3.96600566572238e-06,
      "loss": 0.1759,
      "step": 7963
    },
    {
      "epoch": 2.9409158050221564,
      "grad_norm": 0.3098607063293457,
      "learning_rate": 3.941372090158887e-06,
      "loss": 0.1703,
      "step": 7964
    },
    {
      "epoch": 2.941285081240768,
      "grad_norm": 0.2954683303833008,
      "learning_rate": 3.916738514595394e-06,
      "loss": 0.1844,
      "step": 7965
    },
    {
      "epoch": 2.9416543574593796,
      "grad_norm": 0.3084189295768738,
      "learning_rate": 3.892104939031901e-06,
      "loss": 0.165,
      "step": 7966
    },
    {
      "epoch": 2.9420236336779912,
      "grad_norm": 0.24966798722743988,
      "learning_rate": 3.867471363468408e-06,
      "loss": 0.1672,
      "step": 7967
    },
    {
      "epoch": 2.9423929098966024,
      "grad_norm": 0.2882799804210663,
      "learning_rate": 3.842837787904915e-06,
      "loss": 0.1438,
      "step": 7968
    },
    {
      "epoch": 2.942762186115214,
      "grad_norm": 0.2638078033924103,
      "learning_rate": 3.818204212341422e-06,
      "loss": 0.1667,
      "step": 7969
    },
    {
      "epoch": 2.9431314623338256,
      "grad_norm": 0.24424761533737183,
      "learning_rate": 3.793570636777929e-06,
      "loss": 0.1593,
      "step": 7970
    },
    {
      "epoch": 2.943500738552437,
      "grad_norm": 0.29140397906303406,
      "learning_rate": 3.768937061214436e-06,
      "loss": 0.1558,
      "step": 7971
    },
    {
      "epoch": 2.943870014771049,
      "grad_norm": 0.23234650492668152,
      "learning_rate": 3.744303485650942e-06,
      "loss": 0.1472,
      "step": 7972
    },
    {
      "epoch": 2.9442392909896604,
      "grad_norm": 0.2452017366886139,
      "learning_rate": 3.719669910087449e-06,
      "loss": 0.1529,
      "step": 7973
    },
    {
      "epoch": 2.944608567208272,
      "grad_norm": 0.255310982465744,
      "learning_rate": 3.695036334523956e-06,
      "loss": 0.1496,
      "step": 7974
    },
    {
      "epoch": 2.944977843426883,
      "grad_norm": 0.24070237576961517,
      "learning_rate": 3.670402758960463e-06,
      "loss": 0.1656,
      "step": 7975
    },
    {
      "epoch": 2.945347119645495,
      "grad_norm": 0.3284929096698761,
      "learning_rate": 3.64576918339697e-06,
      "loss": 0.1964,
      "step": 7976
    },
    {
      "epoch": 2.9457163958641064,
      "grad_norm": 0.28253570199012756,
      "learning_rate": 3.621135607833477e-06,
      "loss": 0.1747,
      "step": 7977
    },
    {
      "epoch": 2.946085672082718,
      "grad_norm": 0.3123980760574341,
      "learning_rate": 3.596502032269984e-06,
      "loss": 0.1751,
      "step": 7978
    },
    {
      "epoch": 2.946454948301329,
      "grad_norm": 0.2887688875198364,
      "learning_rate": 3.571868456706491e-06,
      "loss": 0.1769,
      "step": 7979
    },
    {
      "epoch": 2.9468242245199407,
      "grad_norm": 0.2751558721065521,
      "learning_rate": 3.547234881142998e-06,
      "loss": 0.1515,
      "step": 7980
    },
    {
      "epoch": 2.9471935007385524,
      "grad_norm": 0.24743711948394775,
      "learning_rate": 3.522601305579505e-06,
      "loss": 0.1494,
      "step": 7981
    },
    {
      "epoch": 2.947562776957164,
      "grad_norm": 0.2867875099182129,
      "learning_rate": 3.497967730016012e-06,
      "loss": 0.1838,
      "step": 7982
    },
    {
      "epoch": 2.9479320531757756,
      "grad_norm": 0.29121991991996765,
      "learning_rate": 3.473334154452519e-06,
      "loss": 0.1603,
      "step": 7983
    },
    {
      "epoch": 2.948301329394387,
      "grad_norm": 0.3091704547405243,
      "learning_rate": 3.448700578889026e-06,
      "loss": 0.1553,
      "step": 7984
    },
    {
      "epoch": 2.9486706056129988,
      "grad_norm": 0.3139994740486145,
      "learning_rate": 3.424067003325533e-06,
      "loss": 0.1502,
      "step": 7985
    },
    {
      "epoch": 2.94903988183161,
      "grad_norm": 0.25884008407592773,
      "learning_rate": 3.3994334277620402e-06,
      "loss": 0.1555,
      "step": 7986
    },
    {
      "epoch": 2.9494091580502215,
      "grad_norm": 0.2872859537601471,
      "learning_rate": 3.3747998521985464e-06,
      "loss": 0.1579,
      "step": 7987
    },
    {
      "epoch": 2.949778434268833,
      "grad_norm": 0.258348286151886,
      "learning_rate": 3.3501662766350534e-06,
      "loss": 0.1594,
      "step": 7988
    },
    {
      "epoch": 2.9501477104874447,
      "grad_norm": 0.26962390542030334,
      "learning_rate": 3.3255327010715604e-06,
      "loss": 0.162,
      "step": 7989
    },
    {
      "epoch": 2.950516986706056,
      "grad_norm": 0.23087744414806366,
      "learning_rate": 3.3008991255080674e-06,
      "loss": 0.1421,
      "step": 7990
    },
    {
      "epoch": 2.9508862629246675,
      "grad_norm": 0.31266406178474426,
      "learning_rate": 3.2762655499445745e-06,
      "loss": 0.194,
      "step": 7991
    },
    {
      "epoch": 2.951255539143279,
      "grad_norm": 0.26265013217926025,
      "learning_rate": 3.2516319743810815e-06,
      "loss": 0.1632,
      "step": 7992
    },
    {
      "epoch": 2.9516248153618907,
      "grad_norm": 0.2959122657775879,
      "learning_rate": 3.2269983988175885e-06,
      "loss": 0.1748,
      "step": 7993
    },
    {
      "epoch": 2.9519940915805023,
      "grad_norm": 0.23374541103839874,
      "learning_rate": 3.2023648232540955e-06,
      "loss": 0.1464,
      "step": 7994
    },
    {
      "epoch": 2.952363367799114,
      "grad_norm": 0.26890239119529724,
      "learning_rate": 3.1777312476906025e-06,
      "loss": 0.1401,
      "step": 7995
    },
    {
      "epoch": 2.9527326440177255,
      "grad_norm": 0.2581759989261627,
      "learning_rate": 3.1530976721271095e-06,
      "loss": 0.1599,
      "step": 7996
    },
    {
      "epoch": 2.9531019202363367,
      "grad_norm": 0.32330840826034546,
      "learning_rate": 3.1284640965636165e-06,
      "loss": 0.1658,
      "step": 7997
    },
    {
      "epoch": 2.9534711964549483,
      "grad_norm": 0.3487650752067566,
      "learning_rate": 3.103830521000123e-06,
      "loss": 0.1904,
      "step": 7998
    },
    {
      "epoch": 2.95384047267356,
      "grad_norm": 0.27576979994773865,
      "learning_rate": 3.07919694543663e-06,
      "loss": 0.1683,
      "step": 7999
    },
    {
      "epoch": 2.9542097488921715,
      "grad_norm": 0.2911146283149719,
      "learning_rate": 3.054563369873137e-06,
      "loss": 0.1644,
      "step": 8000
    },
    {
      "epoch": 2.9542097488921715,
      "eval_loss": 0.24570205807685852,
      "eval_runtime": 5.8577,
      "eval_samples_per_second": 8.536,
      "eval_steps_per_second": 1.195,
      "step": 8000
    },
    {
      "epoch": 2.9545790251107826,
      "grad_norm": 0.26587924361228943,
      "learning_rate": 3.029929794309644e-06,
      "loss": 0.1499,
      "step": 8001
    },
    {
      "epoch": 2.9549483013293942,
      "grad_norm": 0.2555754780769348,
      "learning_rate": 3.005296218746151e-06,
      "loss": 0.1484,
      "step": 8002
    },
    {
      "epoch": 2.955317577548006,
      "grad_norm": 0.2865438163280487,
      "learning_rate": 2.9806626431826582e-06,
      "loss": 0.1627,
      "step": 8003
    },
    {
      "epoch": 2.9556868537666174,
      "grad_norm": 0.2447013258934021,
      "learning_rate": 2.9560290676191652e-06,
      "loss": 0.1359,
      "step": 8004
    },
    {
      "epoch": 2.956056129985229,
      "grad_norm": 0.30044007301330566,
      "learning_rate": 2.931395492055672e-06,
      "loss": 0.1935,
      "step": 8005
    },
    {
      "epoch": 2.9564254062038406,
      "grad_norm": 0.26513710618019104,
      "learning_rate": 2.906761916492179e-06,
      "loss": 0.1506,
      "step": 8006
    },
    {
      "epoch": 2.9567946824224522,
      "grad_norm": 0.24062731862068176,
      "learning_rate": 2.882128340928686e-06,
      "loss": 0.1558,
      "step": 8007
    },
    {
      "epoch": 2.9571639586410634,
      "grad_norm": 0.2982660233974457,
      "learning_rate": 2.857494765365193e-06,
      "loss": 0.1877,
      "step": 8008
    },
    {
      "epoch": 2.957533234859675,
      "grad_norm": 0.2812516987323761,
      "learning_rate": 2.8328611898017e-06,
      "loss": 0.1619,
      "step": 8009
    },
    {
      "epoch": 2.9579025110782866,
      "grad_norm": 0.23445898294448853,
      "learning_rate": 2.808227614238207e-06,
      "loss": 0.1538,
      "step": 8010
    },
    {
      "epoch": 2.958271787296898,
      "grad_norm": 0.2310696393251419,
      "learning_rate": 2.783594038674714e-06,
      "loss": 0.159,
      "step": 8011
    },
    {
      "epoch": 2.9586410635155094,
      "grad_norm": 0.24844327569007874,
      "learning_rate": 2.7589604631112205e-06,
      "loss": 0.158,
      "step": 8012
    },
    {
      "epoch": 2.959010339734121,
      "grad_norm": 0.24663670361042023,
      "learning_rate": 2.7343268875477275e-06,
      "loss": 0.1501,
      "step": 8013
    },
    {
      "epoch": 2.9593796159527326,
      "grad_norm": 0.2664964497089386,
      "learning_rate": 2.7096933119842346e-06,
      "loss": 0.1495,
      "step": 8014
    },
    {
      "epoch": 2.959748892171344,
      "grad_norm": 0.2691749632358551,
      "learning_rate": 2.6850597364207416e-06,
      "loss": 0.1565,
      "step": 8015
    },
    {
      "epoch": 2.960118168389956,
      "grad_norm": 0.31154337525367737,
      "learning_rate": 2.6604261608572486e-06,
      "loss": 0.1707,
      "step": 8016
    },
    {
      "epoch": 2.9604874446085674,
      "grad_norm": 0.2657756209373474,
      "learning_rate": 2.6357925852937556e-06,
      "loss": 0.1654,
      "step": 8017
    },
    {
      "epoch": 2.960856720827179,
      "grad_norm": 0.3763958811759949,
      "learning_rate": 2.6111590097302626e-06,
      "loss": 0.1668,
      "step": 8018
    },
    {
      "epoch": 2.96122599704579,
      "grad_norm": 0.3201150596141815,
      "learning_rate": 2.5865254341667692e-06,
      "loss": 0.1882,
      "step": 8019
    },
    {
      "epoch": 2.9615952732644018,
      "grad_norm": 0.269149512052536,
      "learning_rate": 2.5618918586032762e-06,
      "loss": 0.1537,
      "step": 8020
    },
    {
      "epoch": 2.9619645494830134,
      "grad_norm": 0.23237310349941254,
      "learning_rate": 2.5372582830397832e-06,
      "loss": 0.1524,
      "step": 8021
    },
    {
      "epoch": 2.962333825701625,
      "grad_norm": 0.24856051802635193,
      "learning_rate": 2.5126247074762903e-06,
      "loss": 0.1653,
      "step": 8022
    },
    {
      "epoch": 2.962703101920236,
      "grad_norm": 0.23548021912574768,
      "learning_rate": 2.4879911319127973e-06,
      "loss": 0.1574,
      "step": 8023
    },
    {
      "epoch": 2.9630723781388477,
      "grad_norm": 0.21869832277297974,
      "learning_rate": 2.4633575563493043e-06,
      "loss": 0.1356,
      "step": 8024
    },
    {
      "epoch": 2.9634416543574593,
      "grad_norm": 0.290967732667923,
      "learning_rate": 2.4387239807858113e-06,
      "loss": 0.1619,
      "step": 8025
    },
    {
      "epoch": 2.963810930576071,
      "grad_norm": 0.3094252645969391,
      "learning_rate": 2.4140904052223183e-06,
      "loss": 0.1964,
      "step": 8026
    },
    {
      "epoch": 2.9641802067946825,
      "grad_norm": 0.3074129521846771,
      "learning_rate": 2.389456829658825e-06,
      "loss": 0.1939,
      "step": 8027
    },
    {
      "epoch": 2.964549483013294,
      "grad_norm": 0.2699299156665802,
      "learning_rate": 2.364823254095332e-06,
      "loss": 0.1629,
      "step": 8028
    },
    {
      "epoch": 2.9649187592319057,
      "grad_norm": 0.267343133687973,
      "learning_rate": 2.340189678531839e-06,
      "loss": 0.1527,
      "step": 8029
    },
    {
      "epoch": 2.965288035450517,
      "grad_norm": 0.2700563669204712,
      "learning_rate": 2.315556102968346e-06,
      "loss": 0.1518,
      "step": 8030
    },
    {
      "epoch": 2.9656573116691285,
      "grad_norm": 0.21548229455947876,
      "learning_rate": 2.290922527404853e-06,
      "loss": 0.1378,
      "step": 8031
    },
    {
      "epoch": 2.96602658788774,
      "grad_norm": 0.2287013977766037,
      "learning_rate": 2.26628895184136e-06,
      "loss": 0.154,
      "step": 8032
    },
    {
      "epoch": 2.9663958641063517,
      "grad_norm": 0.30055034160614014,
      "learning_rate": 2.241655376277867e-06,
      "loss": 0.1648,
      "step": 8033
    },
    {
      "epoch": 2.966765140324963,
      "grad_norm": 0.2905229330062866,
      "learning_rate": 2.2170218007143736e-06,
      "loss": 0.178,
      "step": 8034
    },
    {
      "epoch": 2.9671344165435745,
      "grad_norm": 0.32654109597206116,
      "learning_rate": 2.1923882251508806e-06,
      "loss": 0.1646,
      "step": 8035
    },
    {
      "epoch": 2.967503692762186,
      "grad_norm": 0.3208484947681427,
      "learning_rate": 2.1677546495873876e-06,
      "loss": 0.1558,
      "step": 8036
    },
    {
      "epoch": 2.9678729689807977,
      "grad_norm": 0.3065069019794464,
      "learning_rate": 2.1431210740238947e-06,
      "loss": 0.1592,
      "step": 8037
    },
    {
      "epoch": 2.9682422451994093,
      "grad_norm": 0.2624164819717407,
      "learning_rate": 2.1184874984604017e-06,
      "loss": 0.1423,
      "step": 8038
    },
    {
      "epoch": 2.968611521418021,
      "grad_norm": 0.2898831069469452,
      "learning_rate": 2.0938539228969087e-06,
      "loss": 0.1625,
      "step": 8039
    },
    {
      "epoch": 2.9689807976366325,
      "grad_norm": 0.3115295171737671,
      "learning_rate": 2.0692203473334157e-06,
      "loss": 0.1741,
      "step": 8040
    },
    {
      "epoch": 2.9693500738552436,
      "grad_norm": 0.23843203485012054,
      "learning_rate": 2.0445867717699223e-06,
      "loss": 0.1444,
      "step": 8041
    },
    {
      "epoch": 2.9697193500738552,
      "grad_norm": 0.266390860080719,
      "learning_rate": 2.0199531962064293e-06,
      "loss": 0.1737,
      "step": 8042
    },
    {
      "epoch": 2.970088626292467,
      "grad_norm": 0.2281162291765213,
      "learning_rate": 1.9953196206429363e-06,
      "loss": 0.1385,
      "step": 8043
    },
    {
      "epoch": 2.9704579025110784,
      "grad_norm": 0.3282414972782135,
      "learning_rate": 1.9706860450794434e-06,
      "loss": 0.1923,
      "step": 8044
    },
    {
      "epoch": 2.9708271787296896,
      "grad_norm": 0.33279144763946533,
      "learning_rate": 1.9460524695159504e-06,
      "loss": 0.1817,
      "step": 8045
    },
    {
      "epoch": 2.971196454948301,
      "grad_norm": 0.26364079117774963,
      "learning_rate": 1.9214188939524574e-06,
      "loss": 0.1515,
      "step": 8046
    },
    {
      "epoch": 2.971565731166913,
      "grad_norm": 0.28969061374664307,
      "learning_rate": 1.8967853183889644e-06,
      "loss": 0.1623,
      "step": 8047
    },
    {
      "epoch": 2.9719350073855244,
      "grad_norm": 0.28264951705932617,
      "learning_rate": 1.872151742825471e-06,
      "loss": 0.1681,
      "step": 8048
    },
    {
      "epoch": 2.972304283604136,
      "grad_norm": 0.25010421872138977,
      "learning_rate": 1.847518167261978e-06,
      "loss": 0.1468,
      "step": 8049
    },
    {
      "epoch": 2.9726735598227476,
      "grad_norm": 0.31697016954421997,
      "learning_rate": 1.822884591698485e-06,
      "loss": 0.171,
      "step": 8050
    },
    {
      "epoch": 2.9726735598227476,
      "eval_loss": 0.2450605034828186,
      "eval_runtime": 5.8673,
      "eval_samples_per_second": 8.522,
      "eval_steps_per_second": 1.193,
      "step": 8050
    },
    {
      "epoch": 2.9730428360413588,
      "grad_norm": 0.24555766582489014,
      "learning_rate": 1.798251016134992e-06,
      "loss": 0.1479,
      "step": 8051
    },
    {
      "epoch": 2.9734121122599704,
      "grad_norm": 0.2706266939640045,
      "learning_rate": 1.773617440571499e-06,
      "loss": 0.1783,
      "step": 8052
    },
    {
      "epoch": 2.973781388478582,
      "grad_norm": 0.313739538192749,
      "learning_rate": 1.748983865008006e-06,
      "loss": 0.174,
      "step": 8053
    },
    {
      "epoch": 2.9741506646971936,
      "grad_norm": 0.30564966797828674,
      "learning_rate": 1.724350289444513e-06,
      "loss": 0.181,
      "step": 8054
    },
    {
      "epoch": 2.974519940915805,
      "grad_norm": 0.27199769020080566,
      "learning_rate": 1.6997167138810201e-06,
      "loss": 0.1927,
      "step": 8055
    },
    {
      "epoch": 2.9748892171344163,
      "grad_norm": 0.27665677666664124,
      "learning_rate": 1.6750831383175267e-06,
      "loss": 0.1662,
      "step": 8056
    },
    {
      "epoch": 2.975258493353028,
      "grad_norm": 0.28751057386398315,
      "learning_rate": 1.6504495627540337e-06,
      "loss": 0.1612,
      "step": 8057
    },
    {
      "epoch": 2.9756277695716395,
      "grad_norm": 0.27947041392326355,
      "learning_rate": 1.6258159871905407e-06,
      "loss": 0.1823,
      "step": 8058
    },
    {
      "epoch": 2.975997045790251,
      "grad_norm": 0.2704101502895355,
      "learning_rate": 1.6011824116270478e-06,
      "loss": 0.1662,
      "step": 8059
    },
    {
      "epoch": 2.9763663220088628,
      "grad_norm": 0.23133864998817444,
      "learning_rate": 1.5765488360635548e-06,
      "loss": 0.1445,
      "step": 8060
    },
    {
      "epoch": 2.9767355982274744,
      "grad_norm": 0.258308082818985,
      "learning_rate": 1.5519152605000616e-06,
      "loss": 0.167,
      "step": 8061
    },
    {
      "epoch": 2.9771048744460855,
      "grad_norm": 0.2986674904823303,
      "learning_rate": 1.5272816849365686e-06,
      "loss": 0.1473,
      "step": 8062
    },
    {
      "epoch": 2.977474150664697,
      "grad_norm": 0.283969521522522,
      "learning_rate": 1.5026481093730756e-06,
      "loss": 0.1635,
      "step": 8063
    },
    {
      "epoch": 2.9778434268833087,
      "grad_norm": 0.21613147854804993,
      "learning_rate": 1.4780145338095826e-06,
      "loss": 0.1524,
      "step": 8064
    },
    {
      "epoch": 2.9782127031019203,
      "grad_norm": 0.23958474397659302,
      "learning_rate": 1.4533809582460894e-06,
      "loss": 0.1532,
      "step": 8065
    },
    {
      "epoch": 2.9785819793205315,
      "grad_norm": 0.23401552438735962,
      "learning_rate": 1.4287473826825964e-06,
      "loss": 0.1561,
      "step": 8066
    },
    {
      "epoch": 2.978951255539143,
      "grad_norm": 0.31438690423965454,
      "learning_rate": 1.4041138071191035e-06,
      "loss": 0.1793,
      "step": 8067
    },
    {
      "epoch": 2.9793205317577547,
      "grad_norm": 0.2550419569015503,
      "learning_rate": 1.3794802315556103e-06,
      "loss": 0.1614,
      "step": 8068
    },
    {
      "epoch": 2.9796898079763663,
      "grad_norm": 0.260769248008728,
      "learning_rate": 1.3548466559921173e-06,
      "loss": 0.1554,
      "step": 8069
    },
    {
      "epoch": 2.980059084194978,
      "grad_norm": 0.3034195303916931,
      "learning_rate": 1.3302130804286243e-06,
      "loss": 0.168,
      "step": 8070
    },
    {
      "epoch": 2.9804283604135895,
      "grad_norm": 0.3023551106452942,
      "learning_rate": 1.3055795048651313e-06,
      "loss": 0.1526,
      "step": 8071
    },
    {
      "epoch": 2.980797636632201,
      "grad_norm": 0.2257782369852066,
      "learning_rate": 1.2809459293016381e-06,
      "loss": 0.1723,
      "step": 8072
    },
    {
      "epoch": 2.9811669128508123,
      "grad_norm": 0.3689556419849396,
      "learning_rate": 1.2563123537381451e-06,
      "loss": 0.1608,
      "step": 8073
    },
    {
      "epoch": 2.981536189069424,
      "grad_norm": 0.2841677963733673,
      "learning_rate": 1.2316787781746521e-06,
      "loss": 0.1873,
      "step": 8074
    },
    {
      "epoch": 2.9819054652880355,
      "grad_norm": 0.2721441984176636,
      "learning_rate": 1.2070452026111592e-06,
      "loss": 0.1349,
      "step": 8075
    },
    {
      "epoch": 2.982274741506647,
      "grad_norm": 0.23719848692417145,
      "learning_rate": 1.182411627047666e-06,
      "loss": 0.1715,
      "step": 8076
    },
    {
      "epoch": 2.9826440177252582,
      "grad_norm": 0.32348963618278503,
      "learning_rate": 1.157778051484173e-06,
      "loss": 0.1813,
      "step": 8077
    },
    {
      "epoch": 2.98301329394387,
      "grad_norm": 0.2904803454875946,
      "learning_rate": 1.13314447592068e-06,
      "loss": 0.1822,
      "step": 8078
    },
    {
      "epoch": 2.9833825701624814,
      "grad_norm": 0.27401435375213623,
      "learning_rate": 1.1085109003571868e-06,
      "loss": 0.1683,
      "step": 8079
    },
    {
      "epoch": 2.983751846381093,
      "grad_norm": 0.3362328112125397,
      "learning_rate": 1.0838773247936938e-06,
      "loss": 0.2015,
      "step": 8080
    },
    {
      "epoch": 2.9841211225997046,
      "grad_norm": 0.2640010416507721,
      "learning_rate": 1.0592437492302008e-06,
      "loss": 0.1428,
      "step": 8081
    },
    {
      "epoch": 2.9844903988183162,
      "grad_norm": 0.293450266122818,
      "learning_rate": 1.0346101736667079e-06,
      "loss": 0.1591,
      "step": 8082
    },
    {
      "epoch": 2.984859675036928,
      "grad_norm": 0.23264862596988678,
      "learning_rate": 1.0099765981032147e-06,
      "loss": 0.1231,
      "step": 8083
    },
    {
      "epoch": 2.985228951255539,
      "grad_norm": 0.2444784939289093,
      "learning_rate": 9.853430225397217e-07,
      "loss": 0.145,
      "step": 8084
    },
    {
      "epoch": 2.9855982274741506,
      "grad_norm": 0.39302757382392883,
      "learning_rate": 9.607094469762287e-07,
      "loss": 0.1942,
      "step": 8085
    },
    {
      "epoch": 2.985967503692762,
      "grad_norm": 0.23764002323150635,
      "learning_rate": 9.360758714127355e-07,
      "loss": 0.1602,
      "step": 8086
    },
    {
      "epoch": 2.986336779911374,
      "grad_norm": 0.2889630198478699,
      "learning_rate": 9.114422958492425e-07,
      "loss": 0.1732,
      "step": 8087
    },
    {
      "epoch": 2.986706056129985,
      "grad_norm": 0.32426342368125916,
      "learning_rate": 8.868087202857495e-07,
      "loss": 0.1864,
      "step": 8088
    },
    {
      "epoch": 2.9870753323485966,
      "grad_norm": 0.2573675513267517,
      "learning_rate": 8.621751447222565e-07,
      "loss": 0.1499,
      "step": 8089
    },
    {
      "epoch": 2.987444608567208,
      "grad_norm": 0.2688586711883545,
      "learning_rate": 8.375415691587634e-07,
      "loss": 0.147,
      "step": 8090
    },
    {
      "epoch": 2.9878138847858198,
      "grad_norm": 0.27188539505004883,
      "learning_rate": 8.129079935952704e-07,
      "loss": 0.1597,
      "step": 8091
    },
    {
      "epoch": 2.9881831610044314,
      "grad_norm": 0.2932220697402954,
      "learning_rate": 7.882744180317774e-07,
      "loss": 0.1718,
      "step": 8092
    },
    {
      "epoch": 2.988552437223043,
      "grad_norm": 0.30258461833000183,
      "learning_rate": 7.636408424682843e-07,
      "loss": 0.1796,
      "step": 8093
    },
    {
      "epoch": 2.9889217134416546,
      "grad_norm": 0.24038194119930267,
      "learning_rate": 7.390072669047913e-07,
      "loss": 0.132,
      "step": 8094
    },
    {
      "epoch": 2.9892909896602657,
      "grad_norm": 0.2573639750480652,
      "learning_rate": 7.143736913412982e-07,
      "loss": 0.1578,
      "step": 8095
    },
    {
      "epoch": 2.9896602658788773,
      "grad_norm": 0.3152335286140442,
      "learning_rate": 6.897401157778051e-07,
      "loss": 0.1636,
      "step": 8096
    },
    {
      "epoch": 2.990029542097489,
      "grad_norm": 0.22796498239040375,
      "learning_rate": 6.651065402143121e-07,
      "loss": 0.1319,
      "step": 8097
    },
    {
      "epoch": 2.9903988183161005,
      "grad_norm": 0.24346081912517548,
      "learning_rate": 6.404729646508191e-07,
      "loss": 0.1606,
      "step": 8098
    },
    {
      "epoch": 2.9907680945347117,
      "grad_norm": 0.27753716707229614,
      "learning_rate": 6.158393890873261e-07,
      "loss": 0.1547,
      "step": 8099
    },
    {
      "epoch": 2.9911373707533233,
      "grad_norm": 0.24621686339378357,
      "learning_rate": 5.91205813523833e-07,
      "loss": 0.1403,
      "step": 8100
    },
    {
      "epoch": 2.9911373707533233,
      "eval_loss": 0.24522972106933594,
      "eval_runtime": 5.8569,
      "eval_samples_per_second": 8.537,
      "eval_steps_per_second": 1.195,
      "step": 8100
    },
    {
      "epoch": 2.991506646971935,
      "grad_norm": 0.2303771823644638,
      "learning_rate": 5.6657223796034e-07,
      "loss": 0.1515,
      "step": 8101
    },
    {
      "epoch": 2.9918759231905465,
      "grad_norm": 0.2339046150445938,
      "learning_rate": 5.419386623968469e-07,
      "loss": 0.1266,
      "step": 8102
    },
    {
      "epoch": 2.992245199409158,
      "grad_norm": 0.32565292716026306,
      "learning_rate": 5.173050868333539e-07,
      "loss": 0.1642,
      "step": 8103
    },
    {
      "epoch": 2.9926144756277697,
      "grad_norm": 0.2603965997695923,
      "learning_rate": 4.926715112698608e-07,
      "loss": 0.1598,
      "step": 8104
    },
    {
      "epoch": 2.9929837518463813,
      "grad_norm": 0.2606295049190521,
      "learning_rate": 4.6803793570636775e-07,
      "loss": 0.1962,
      "step": 8105
    },
    {
      "epoch": 2.9933530280649925,
      "grad_norm": 0.2298162877559662,
      "learning_rate": 4.4340436014287476e-07,
      "loss": 0.1472,
      "step": 8106
    },
    {
      "epoch": 2.993722304283604,
      "grad_norm": 0.28554925322532654,
      "learning_rate": 4.187707845793817e-07,
      "loss": 0.1993,
      "step": 8107
    },
    {
      "epoch": 2.9940915805022157,
      "grad_norm": 0.2721603214740753,
      "learning_rate": 3.941372090158887e-07,
      "loss": 0.1705,
      "step": 8108
    },
    {
      "epoch": 2.9944608567208273,
      "grad_norm": 0.2951487600803375,
      "learning_rate": 3.6950363345239565e-07,
      "loss": 0.18,
      "step": 8109
    },
    {
      "epoch": 2.9948301329394384,
      "grad_norm": 0.27289626002311707,
      "learning_rate": 3.4487005788890257e-07,
      "loss": 0.1754,
      "step": 8110
    },
    {
      "epoch": 2.99519940915805,
      "grad_norm": 0.29108723998069763,
      "learning_rate": 3.2023648232540953e-07,
      "loss": 0.1602,
      "step": 8111
    },
    {
      "epoch": 2.9955686853766617,
      "grad_norm": 0.2703995704650879,
      "learning_rate": 2.956029067619165e-07,
      "loss": 0.1659,
      "step": 8112
    },
    {
      "epoch": 2.9959379615952733,
      "grad_norm": 0.23298288881778717,
      "learning_rate": 2.7096933119842346e-07,
      "loss": 0.1263,
      "step": 8113
    },
    {
      "epoch": 2.996307237813885,
      "grad_norm": 0.20616547763347626,
      "learning_rate": 2.463357556349304e-07,
      "loss": 0.1347,
      "step": 8114
    },
    {
      "epoch": 2.9966765140324965,
      "grad_norm": 0.2639867067337036,
      "learning_rate": 2.2170218007143738e-07,
      "loss": 0.146,
      "step": 8115
    },
    {
      "epoch": 2.997045790251108,
      "grad_norm": 0.3198007047176361,
      "learning_rate": 1.9706860450794435e-07,
      "loss": 0.2015,
      "step": 8116
    },
    {
      "epoch": 2.9974150664697192,
      "grad_norm": 0.24881775677204132,
      "learning_rate": 1.7243502894445128e-07,
      "loss": 0.1598,
      "step": 8117
    },
    {
      "epoch": 2.997784342688331,
      "grad_norm": 0.24045133590698242,
      "learning_rate": 1.4780145338095825e-07,
      "loss": 0.1345,
      "step": 8118
    },
    {
      "epoch": 2.9981536189069424,
      "grad_norm": 0.29275083541870117,
      "learning_rate": 1.231678778174652e-07,
      "loss": 0.1775,
      "step": 8119
    },
    {
      "epoch": 2.998522895125554,
      "grad_norm": 0.33159106969833374,
      "learning_rate": 9.853430225397217e-08,
      "loss": 0.1872,
      "step": 8120
    },
    {
      "epoch": 2.998892171344165,
      "grad_norm": 0.2544606626033783,
      "learning_rate": 7.390072669047912e-08,
      "loss": 0.1503,
      "step": 8121
    },
    {
      "epoch": 2.999261447562777,
      "grad_norm": 0.24590566754341125,
      "learning_rate": 4.9267151126986086e-08,
      "loss": 0.1568,
      "step": 8122
    },
    {
      "epoch": 2.9996307237813884,
      "grad_norm": 0.2536267638206482,
      "learning_rate": 2.4633575563493043e-08,
      "loss": 0.1525,
      "step": 8123
    },
    {
      "epoch": 3.0,
      "grad_norm": 0.32975172996520996,
      "learning_rate": 0.0,
      "loss": 0.1622,
      "step": 8124
    }
  ],
  "logging_steps": 1,
  "max_steps": 8124,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 6.03894823567147e+17,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}