{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.2458760494452179,
  "eval_steps": 365,
  "global_step": 1457,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.00016875500991435683,
      "grad_norm": 1.940874608408194e-05,
      "learning_rate": 2e-05,
      "loss": 46.0,
      "step": 1
    },
    {
      "epoch": 0.00016875500991435683,
      "eval_loss": 11.5,
      "eval_runtime": 14.7961,
      "eval_samples_per_second": 168.625,
      "eval_steps_per_second": 84.346,
      "step": 1
    },
    {
      "epoch": 0.00033751001982871366,
      "grad_norm": 1.7339452824671753e-05,
      "learning_rate": 4e-05,
      "loss": 46.0,
      "step": 2
    },
    {
      "epoch": 0.0005062650297430705,
      "grad_norm": 9.871354450297076e-06,
      "learning_rate": 6e-05,
      "loss": 46.0,
      "step": 3
    },
    {
      "epoch": 0.0006750200396574273,
      "grad_norm": 1.9611639800132252e-05,
      "learning_rate": 8e-05,
      "loss": 46.0,
      "step": 4
    },
    {
      "epoch": 0.0008437750495717841,
      "grad_norm": 1.9497307221172377e-05,
      "learning_rate": 0.0001,
      "loss": 46.0,
      "step": 5
    },
    {
      "epoch": 0.001012530059486141,
      "grad_norm": 1.4163069863570854e-05,
      "learning_rate": 0.00012,
      "loss": 46.0,
      "step": 6
    },
    {
      "epoch": 0.0011812850694004977,
      "grad_norm": 2.7470567147247493e-05,
      "learning_rate": 0.00014,
      "loss": 46.0,
      "step": 7
    },
    {
      "epoch": 0.0013500400793148546,
      "grad_norm": 1.262454861716833e-05,
      "learning_rate": 0.00016,
      "loss": 46.0,
      "step": 8
    },
    {
      "epoch": 0.0015187950892292116,
      "grad_norm": 1.2461353435355704e-05,
      "learning_rate": 0.00018,
      "loss": 46.0,
      "step": 9
    },
    {
      "epoch": 0.0016875500991435683,
      "grad_norm": 1.924686148413457e-05,
      "learning_rate": 0.0002,
      "loss": 46.0,
      "step": 10
    },
    {
      "epoch": 0.0018563051090579252,
      "grad_norm": 1.5145715224207379e-05,
      "learning_rate": 0.0001999997643146886,
      "loss": 46.0,
      "step": 11
    },
    {
      "epoch": 0.002025060118972282,
      "grad_norm": 8.845885531627573e-06,
      "learning_rate": 0.0001999990572598653,
      "loss": 46.0,
      "step": 12
    },
    {
      "epoch": 0.0021938151288866388,
      "grad_norm": 1.8190678019891493e-05,
      "learning_rate": 0.00019999787883886297,
      "loss": 46.0,
      "step": 13
    },
    {
      "epoch": 0.0023625701388009955,
      "grad_norm": 1.827279083954636e-05,
      "learning_rate": 0.00019999622905723634,
      "loss": 46.0,
      "step": 14
    },
    {
      "epoch": 0.0025313251487153526,
      "grad_norm": 1.6565853002248332e-05,
      "learning_rate": 0.00019999410792276198,
      "loss": 46.0,
      "step": 15
    },
    {
      "epoch": 0.0027000801586297093,
      "grad_norm": 1.0454502444190439e-05,
      "learning_rate": 0.00019999151544543832,
      "loss": 46.0,
      "step": 16
    },
    {
      "epoch": 0.002868835168544066,
      "grad_norm": 1.3837036021868698e-05,
      "learning_rate": 0.00019998845163748553,
      "loss": 46.0,
      "step": 17
    },
    {
      "epoch": 0.003037590178458423,
      "grad_norm": 1.835626426327508e-05,
      "learning_rate": 0.0001999849165133455,
      "loss": 46.0,
      "step": 18
    },
    {
      "epoch": 0.00320634518837278,
      "grad_norm": 1.9169588995282538e-05,
      "learning_rate": 0.00019998091008968175,
      "loss": 46.0,
      "step": 19
    },
    {
      "epoch": 0.0033751001982871365,
      "grad_norm": 9.931905879057012e-06,
      "learning_rate": 0.0001999764323853794,
      "loss": 46.0,
      "step": 20
    },
    {
      "epoch": 0.0035438552082014936,
      "grad_norm": 1.246378269570414e-05,
      "learning_rate": 0.00019997148342154502,
      "loss": 46.0,
      "step": 21
    },
    {
      "epoch": 0.0037126102181158503,
      "grad_norm": 1.627793062652927e-05,
      "learning_rate": 0.0001999660632215066,
      "loss": 46.0,
      "step": 22
    },
    {
      "epoch": 0.003881365228030207,
      "grad_norm": 1.6465271983179264e-05,
      "learning_rate": 0.00019996017181081336,
      "loss": 46.0,
      "step": 23
    },
    {
      "epoch": 0.004050120237944564,
      "grad_norm": 1.4562248907168396e-05,
      "learning_rate": 0.00019995380921723562,
      "loss": 46.0,
      "step": 24
    },
    {
      "epoch": 0.00421887524785892,
      "grad_norm": 1.0674185432435479e-05,
      "learning_rate": 0.00019994697547076487,
      "loss": 46.0,
      "step": 25
    },
    {
      "epoch": 0.0043876302577732776,
      "grad_norm": 1.2923982467327733e-05,
      "learning_rate": 0.00019993967060361335,
      "loss": 46.0,
      "step": 26
    },
    {
      "epoch": 0.004556385267687635,
      "grad_norm": 1.9432607587077655e-05,
      "learning_rate": 0.00019993189465021405,
      "loss": 46.0,
      "step": 27
    },
    {
      "epoch": 0.004725140277601991,
      "grad_norm": 1.4471233043877874e-05,
      "learning_rate": 0.0001999236476472205,
      "loss": 46.0,
      "step": 28
    },
    {
      "epoch": 0.004893895287516348,
      "grad_norm": 2.0513718482106924e-05,
      "learning_rate": 0.0001999149296335067,
      "loss": 46.0,
      "step": 29
    },
    {
      "epoch": 0.005062650297430705,
      "grad_norm": 2.3202137526823208e-05,
      "learning_rate": 0.00019990574065016677,
      "loss": 46.0,
      "step": 30
    },
    {
      "epoch": 0.0052314053073450615,
      "grad_norm": 2.1836229279870167e-05,
      "learning_rate": 0.00019989608074051489,
      "loss": 46.0,
      "step": 31
    },
    {
      "epoch": 0.005400160317259419,
      "grad_norm": 1.5274166798917577e-05,
      "learning_rate": 0.00019988594995008505,
      "loss": 46.0,
      "step": 32
    },
    {
      "epoch": 0.005568915327173776,
      "grad_norm": 1.919052010634914e-05,
      "learning_rate": 0.00019987534832663082,
      "loss": 46.0,
      "step": 33
    },
    {
      "epoch": 0.005737670337088132,
      "grad_norm": 1.5861112842685543e-05,
      "learning_rate": 0.0001998642759201251,
      "loss": 46.0,
      "step": 34
    },
    {
      "epoch": 0.005906425347002489,
      "grad_norm": 1.185925975732971e-05,
      "learning_rate": 0.00019985273278276,
      "loss": 46.0,
      "step": 35
    },
    {
      "epoch": 0.006075180356916846,
      "grad_norm": 1.6194346244446933e-05,
      "learning_rate": 0.00019984071896894646,
      "loss": 46.0,
      "step": 36
    },
    {
      "epoch": 0.0062439353668312025,
      "grad_norm": 1.961121779459063e-05,
      "learning_rate": 0.0001998282345353141,
      "loss": 46.0,
      "step": 37
    },
    {
      "epoch": 0.00641269037674556,
      "grad_norm": 2.1559202650678344e-05,
      "learning_rate": 0.0001998152795407108,
      "loss": 46.0,
      "step": 38
    },
    {
      "epoch": 0.006581445386659917,
      "grad_norm": 2.8157497581560165e-05,
      "learning_rate": 0.00019980185404620268,
      "loss": 46.0,
      "step": 39
    },
    {
      "epoch": 0.006750200396574273,
      "grad_norm": 2.1486024706973694e-05,
      "learning_rate": 0.00019978795811507354,
      "loss": 46.0,
      "step": 40
    },
    {
      "epoch": 0.00691895540648863,
      "grad_norm": 2.3358212274615653e-05,
      "learning_rate": 0.00019977359181282473,
      "loss": 46.0,
      "step": 41
    },
    {
      "epoch": 0.007087710416402987,
      "grad_norm": 1.6756239347159863e-05,
      "learning_rate": 0.00019975875520717479,
      "loss": 46.0,
      "step": 42
    },
    {
      "epoch": 0.0072564654263173435,
      "grad_norm": 1.7175016182591207e-05,
      "learning_rate": 0.00019974344836805905,
      "loss": 46.0,
      "step": 43
    },
    {
      "epoch": 0.007425220436231701,
      "grad_norm": 1.2200899618619587e-05,
      "learning_rate": 0.00019972767136762953,
      "loss": 46.0,
      "step": 44
    },
    {
      "epoch": 0.007593975446146058,
      "grad_norm": 1.7348913388559595e-05,
      "learning_rate": 0.00019971142428025433,
      "loss": 46.0,
      "step": 45
    },
    {
      "epoch": 0.007762730456060414,
      "grad_norm": 9.565149412082974e-06,
      "learning_rate": 0.00019969470718251748,
      "loss": 46.0,
      "step": 46
    },
    {
      "epoch": 0.007931485465974771,
      "grad_norm": 1.5444033124367706e-05,
      "learning_rate": 0.00019967752015321845,
      "loss": 46.0,
      "step": 47
    },
    {
      "epoch": 0.008100240475889128,
      "grad_norm": 1.9607326976256445e-05,
      "learning_rate": 0.00019965986327337185,
      "loss": 46.0,
      "step": 48
    },
    {
      "epoch": 0.008268995485803485,
      "grad_norm": 1.6280893760267645e-05,
      "learning_rate": 0.00019964173662620702,
      "loss": 46.0,
      "step": 49
    },
    {
      "epoch": 0.00843775049571784,
      "grad_norm": 2.2253505449043587e-05,
      "learning_rate": 0.00019962314029716766,
      "loss": 46.0,
      "step": 50
    },
    {
      "epoch": 0.008606505505632198,
      "grad_norm": 1.3872278032067697e-05,
      "learning_rate": 0.0001996040743739114,
      "loss": 46.0,
      "step": 51
    },
    {
      "epoch": 0.008775260515546555,
      "grad_norm": 3.644825119408779e-05,
      "learning_rate": 0.0001995845389463094,
      "loss": 46.0,
      "step": 52
    },
    {
      "epoch": 0.008944015525460912,
      "grad_norm": 1.858348332461901e-05,
      "learning_rate": 0.00019956453410644592,
      "loss": 46.0,
      "step": 53
    },
    {
      "epoch": 0.00911277053537527,
      "grad_norm": 1.7900563761941157e-05,
      "learning_rate": 0.0001995440599486179,
      "loss": 46.0,
      "step": 54
    },
    {
      "epoch": 0.009281525545289626,
      "grad_norm": 4.377702498459257e-05,
      "learning_rate": 0.0001995231165693345,
      "loss": 46.0,
      "step": 55
    },
    {
      "epoch": 0.009450280555203982,
      "grad_norm": 2.3857590349507518e-05,
      "learning_rate": 0.00019950170406731667,
      "loss": 46.0,
      "step": 56
    },
    {
      "epoch": 0.009619035565118339,
      "grad_norm": 1.2674429854087066e-05,
      "learning_rate": 0.00019947982254349666,
      "loss": 46.0,
      "step": 57
    },
    {
      "epoch": 0.009787790575032696,
      "grad_norm": 2.890539326472208e-05,
      "learning_rate": 0.00019945747210101754,
      "loss": 46.0,
      "step": 58
    },
    {
      "epoch": 0.009956545584947053,
      "grad_norm": 1.9787186829489656e-05,
      "learning_rate": 0.0001994346528452327,
      "loss": 46.0,
      "step": 59
    },
    {
      "epoch": 0.01012530059486141,
      "grad_norm": 2.427671461191494e-05,
      "learning_rate": 0.00019941136488370542,
      "loss": 46.0,
      "step": 60
    },
    {
      "epoch": 0.010294055604775768,
      "grad_norm": 1.2431184586603194e-05,
      "learning_rate": 0.00019938760832620834,
      "loss": 46.0,
      "step": 61
    },
    {
      "epoch": 0.010462810614690123,
      "grad_norm": 2.0812987713725306e-05,
      "learning_rate": 0.00019936338328472287,
      "loss": 46.0,
      "step": 62
    },
    {
      "epoch": 0.01063156562460448,
      "grad_norm": 2.8024591301800683e-05,
      "learning_rate": 0.00019933868987343875,
      "loss": 46.0,
      "step": 63
    },
    {
      "epoch": 0.010800320634518837,
      "grad_norm": 2.2569249267689884e-05,
      "learning_rate": 0.0001993135282087535,
      "loss": 46.0,
      "step": 64
    },
    {
      "epoch": 0.010969075644433194,
      "grad_norm": 1.9383338440093212e-05,
      "learning_rate": 0.0001992878984092717,
      "loss": 46.0,
      "step": 65
    },
    {
      "epoch": 0.011137830654347551,
      "grad_norm": 2.542112815717701e-05,
      "learning_rate": 0.00019926180059580482,
      "loss": 46.0,
      "step": 66
    },
    {
      "epoch": 0.011306585664261909,
      "grad_norm": 1.598012568138074e-05,
      "learning_rate": 0.00019923523489137024,
      "loss": 46.0,
      "step": 67
    },
    {
      "epoch": 0.011475340674176264,
      "grad_norm": 1.4789104170631617e-05,
      "learning_rate": 0.00019920820142119085,
      "loss": 46.0,
      "step": 68
    },
    {
      "epoch": 0.011644095684090621,
      "grad_norm": 1.9799528672592714e-05,
      "learning_rate": 0.00019918070031269453,
      "loss": 46.0,
      "step": 69
    },
    {
      "epoch": 0.011812850694004978,
      "grad_norm": 3.249241126468405e-05,
      "learning_rate": 0.00019915273169551342,
      "loss": 46.0,
      "step": 70
    },
    {
      "epoch": 0.011981605703919335,
      "grad_norm": 2.287813367729541e-05,
      "learning_rate": 0.00019912429570148339,
      "loss": 46.0,
      "step": 71
    },
    {
      "epoch": 0.012150360713833692,
      "grad_norm": 3.186216781614348e-05,
      "learning_rate": 0.0001990953924646433,
      "loss": 46.0,
      "step": 72
    },
    {
      "epoch": 0.01231911572374805,
      "grad_norm": 4.087354682269506e-05,
      "learning_rate": 0.00019906602212123455,
      "loss": 46.0,
      "step": 73
    },
    {
      "epoch": 0.012487870733662405,
      "grad_norm": 2.5896191800711676e-05,
      "learning_rate": 0.00019903618480970035,
      "loss": 46.0,
      "step": 74
    },
    {
      "epoch": 0.012656625743576762,
      "grad_norm": 2.6261466700816527e-05,
      "learning_rate": 0.00019900588067068493,
      "loss": 46.0,
      "step": 75
    },
    {
      "epoch": 0.01282538075349112,
      "grad_norm": 1.815898940549232e-05,
      "learning_rate": 0.0001989751098470332,
      "loss": 46.0,
      "step": 76
    },
    {
      "epoch": 0.012994135763405476,
      "grad_norm": 1.682456240814645e-05,
      "learning_rate": 0.0001989438724837897,
      "loss": 46.0,
      "step": 77
    },
    {
      "epoch": 0.013162890773319834,
      "grad_norm": 2.581811168056447e-05,
      "learning_rate": 0.00019891216872819825,
      "loss": 46.0,
      "step": 78
    },
    {
      "epoch": 0.013331645783234189,
      "grad_norm": 3.8674785173498094e-05,
      "learning_rate": 0.00019887999872970097,
      "loss": 46.0,
      "step": 79
    },
    {
      "epoch": 0.013500400793148546,
      "grad_norm": 3.877016933984123e-05,
      "learning_rate": 0.00019884736263993784,
      "loss": 46.0,
      "step": 80
    },
    {
      "epoch": 0.013669155803062903,
      "grad_norm": 1.5791521946084686e-05,
      "learning_rate": 0.0001988142606127458,
      "loss": 46.0,
      "step": 81
    },
    {
      "epoch": 0.01383791081297726,
      "grad_norm": 2.4946857593022287e-05,
      "learning_rate": 0.00019878069280415803,
      "loss": 46.0,
      "step": 82
    },
    {
      "epoch": 0.014006665822891617,
      "grad_norm": 4.878333129454404e-05,
      "learning_rate": 0.00019874665937240335,
      "loss": 46.0,
      "step": 83
    },
    {
      "epoch": 0.014175420832805975,
      "grad_norm": 2.5513558284728788e-05,
      "learning_rate": 0.00019871216047790538,
      "loss": 46.0,
      "step": 84
    },
    {
      "epoch": 0.01434417584272033,
      "grad_norm": 5.085681550554e-05,
      "learning_rate": 0.00019867719628328175,
      "loss": 46.0,
      "step": 85
    },
    {
      "epoch": 0.014512930852634687,
      "grad_norm": 4.014965088572353e-05,
      "learning_rate": 0.0001986417669533434,
      "loss": 46.0,
      "step": 86
    },
    {
      "epoch": 0.014681685862549044,
      "grad_norm": 2.0565448721754365e-05,
      "learning_rate": 0.0001986058726550938,
      "loss": 46.0,
      "step": 87
    },
    {
      "epoch": 0.014850440872463401,
      "grad_norm": 4.422978236107156e-05,
      "learning_rate": 0.00019856951355772814,
      "loss": 46.0,
      "step": 88
    },
    {
      "epoch": 0.015019195882377758,
      "grad_norm": 2.699590550037101e-05,
      "learning_rate": 0.00019853268983263244,
      "loss": 46.0,
      "step": 89
    },
    {
      "epoch": 0.015187950892292116,
      "grad_norm": 1.274027908948483e-05,
      "learning_rate": 0.000198495401653383,
      "loss": 46.0,
      "step": 90
    },
    {
      "epoch": 0.015356705902206471,
      "grad_norm": 4.096307384315878e-05,
      "learning_rate": 0.00019845764919574537,
      "loss": 46.0,
      "step": 91
    },
    {
      "epoch": 0.015525460912120828,
      "grad_norm": 3.1611998565495014e-05,
      "learning_rate": 0.00019841943263767346,
      "loss": 46.0,
      "step": 92
    },
    {
      "epoch": 0.015694215922035185,
      "grad_norm": 2.0112831407459453e-05,
      "learning_rate": 0.00019838075215930894,
      "loss": 46.0,
      "step": 93
    },
    {
      "epoch": 0.015862970931949542,
      "grad_norm": 6.471107190009207e-05,
      "learning_rate": 0.00019834160794298024,
      "loss": 46.0,
      "step": 94
    },
    {
      "epoch": 0.0160317259418639,
      "grad_norm": 3.331500920467079e-05,
      "learning_rate": 0.00019830200017320168,
      "loss": 46.0,
      "step": 95
    },
    {
      "epoch": 0.016200480951778257,
      "grad_norm": 3.4283220884390175e-05,
      "learning_rate": 0.0001982619290366726,
      "loss": 46.0,
      "step": 96
    },
    {
      "epoch": 0.016369235961692614,
      "grad_norm": 4.784906195709482e-05,
      "learning_rate": 0.00019822139472227665,
      "loss": 46.0,
      "step": 97
    },
    {
      "epoch": 0.01653799097160697,
      "grad_norm": 4.9155318265547976e-05,
      "learning_rate": 0.00019818039742108064,
      "loss": 46.0,
      "step": 98
    },
    {
      "epoch": 0.016706745981521328,
      "grad_norm": 4.837827509618364e-05,
      "learning_rate": 0.00019813893732633378,
      "loss": 46.0,
      "step": 99
    },
    {
      "epoch": 0.01687550099143568,
      "grad_norm": 4.3958363676210865e-05,
      "learning_rate": 0.00019809701463346683,
      "loss": 46.0,
      "step": 100
    },
    {
      "epoch": 0.01704425600135004,
      "grad_norm": 5.13470804435201e-05,
      "learning_rate": 0.000198054629540091,
      "loss": 46.0,
      "step": 101
    },
    {
      "epoch": 0.017213011011264396,
      "grad_norm": 3.207432382623665e-05,
      "learning_rate": 0.00019801178224599722,
      "loss": 46.0,
      "step": 102
    },
    {
      "epoch": 0.017381766021178753,
      "grad_norm": 3.24075881508179e-05,
      "learning_rate": 0.00019796847295315502,
      "loss": 46.0,
      "step": 103
    },
    {
      "epoch": 0.01755052103109311,
      "grad_norm": 5.3687395848101005e-05,
      "learning_rate": 0.00019792470186571167,
      "loss": 46.0,
      "step": 104
    },
    {
      "epoch": 0.017719276041007467,
      "grad_norm": 2.9774340873700567e-05,
      "learning_rate": 0.00019788046918999122,
      "loss": 46.0,
      "step": 105
    },
    {
      "epoch": 0.017888031050921824,
      "grad_norm": 7.565080159110948e-05,
      "learning_rate": 0.00019783577513449353,
      "loss": 46.0,
      "step": 106
    },
    {
      "epoch": 0.01805678606083618,
      "grad_norm": 5.2741965191671625e-05,
      "learning_rate": 0.0001977906199098932,
      "loss": 46.0,
      "step": 107
    },
    {
      "epoch": 0.01822554107075054,
      "grad_norm": 3.216122786398046e-05,
      "learning_rate": 0.0001977450037290388,
      "loss": 46.0,
      "step": 108
    },
    {
      "epoch": 0.018394296080664896,
      "grad_norm": 5.457305451272987e-05,
      "learning_rate": 0.00019769892680695147,
      "loss": 46.0,
      "step": 109
    },
    {
      "epoch": 0.018563051090579253,
      "grad_norm": 3.489471419015899e-05,
      "learning_rate": 0.00019765238936082438,
      "loss": 46.0,
      "step": 110
    },
    {
      "epoch": 0.01873180610049361,
      "grad_norm": 1.886937752715312e-05,
      "learning_rate": 0.00019760539161002135,
      "loss": 46.0,
      "step": 111
    },
    {
      "epoch": 0.018900561110407964,
      "grad_norm": 3.381875285413116e-05,
      "learning_rate": 0.00019755793377607597,
      "loss": 46.0,
      "step": 112
    },
    {
      "epoch": 0.01906931612032232,
      "grad_norm": 3.3628173696342856e-05,
      "learning_rate": 0.00019751001608269052,
      "loss": 46.0,
      "step": 113
    },
    {
      "epoch": 0.019238071130236678,
      "grad_norm": 5.391196464188397e-05,
      "learning_rate": 0.00019746163875573492,
      "loss": 46.0,
      "step": 114
    },
    {
      "epoch": 0.019406826140151035,
      "grad_norm": 4.308508141548373e-05,
      "learning_rate": 0.0001974128020232457,
      "loss": 46.0,
      "step": 115
    },
    {
      "epoch": 0.019575581150065392,
      "grad_norm": 0.00010606838623061776,
      "learning_rate": 0.00019736350611542487,
      "loss": 46.0,
      "step": 116
    },
    {
      "epoch": 0.01974433615997975,
      "grad_norm": 4.744268153444864e-05,
      "learning_rate": 0.00019731375126463886,
      "loss": 46.0,
      "step": 117
    },
    {
      "epoch": 0.019913091169894107,
      "grad_norm": 3.2672236557118595e-05,
      "learning_rate": 0.00019726353770541742,
      "loss": 46.0,
      "step": 118
    },
    {
      "epoch": 0.020081846179808464,
      "grad_norm": 1.8585633370094e-05,
      "learning_rate": 0.0001972128656744525,
      "loss": 46.0,
      "step": 119
    },
    {
      "epoch": 0.02025060118972282,
      "grad_norm": 3.3655844163149595e-05,
      "learning_rate": 0.0001971617354105972,
      "loss": 46.0,
      "step": 120
    },
    {
      "epoch": 0.020419356199637178,
      "grad_norm": 4.9799295084085315e-05,
      "learning_rate": 0.00019711014715486448,
      "loss": 46.0,
      "step": 121
    },
    {
      "epoch": 0.020588111209551535,
      "grad_norm": 7.914419256849214e-05,
      "learning_rate": 0.00019705810115042634,
      "loss": 46.0,
      "step": 122
    },
    {
      "epoch": 0.02075686621946589,
      "grad_norm": 4.802920011570677e-05,
      "learning_rate": 0.00019700559764261225,
      "loss": 46.0,
      "step": 123
    },
    {
      "epoch": 0.020925621229380246,
      "grad_norm": 3.76962598238606e-05,
      "learning_rate": 0.0001969526368789084,
      "loss": 46.0,
      "step": 124
    },
    {
      "epoch": 0.021094376239294603,
      "grad_norm": 3.57206336047966e-05,
      "learning_rate": 0.00019689921910895627,
      "loss": 46.0,
      "step": 125
    },
    {
      "epoch": 0.02126313124920896,
      "grad_norm": 0.0001358168519800529,
      "learning_rate": 0.00019684534458455145,
      "loss": 46.0,
      "step": 126
    },
    {
      "epoch": 0.021431886259123317,
      "grad_norm": 3.319705865578726e-05,
      "learning_rate": 0.0001967910135596427,
      "loss": 46.0,
      "step": 127
    },
    {
      "epoch": 0.021600641269037674,
      "grad_norm": 9.154703002423048e-05,
      "learning_rate": 0.0001967362262903305,
      "loss": 46.0,
      "step": 128
    },
    {
      "epoch": 0.02176939627895203,
      "grad_norm": 0.00012708237045444548,
      "learning_rate": 0.00019668098303486593,
      "loss": 46.0,
      "step": 129
    },
    {
      "epoch": 0.02193815128886639,
      "grad_norm": 5.1937749958597124e-05,
      "learning_rate": 0.00019662528405364947,
      "loss": 46.0,
      "step": 130
    },
    {
      "epoch": 0.022106906298780746,
      "grad_norm": 6.14839227637276e-05,
      "learning_rate": 0.00019656912960922974,
      "loss": 46.0,
      "step": 131
    },
    {
      "epoch": 0.022275661308695103,
      "grad_norm": 5.0448226829757914e-05,
      "learning_rate": 0.0001965125199663023,
      "loss": 46.0,
      "step": 132
    },
    {
      "epoch": 0.02244441631860946,
      "grad_norm": 0.00013035547453910112,
      "learning_rate": 0.0001964554553917084,
      "loss": 46.0,
      "step": 133
    },
    {
      "epoch": 0.022613171328523817,
      "grad_norm": 5.22616392117925e-05,
      "learning_rate": 0.00019639793615443366,
      "loss": 46.0,
      "step": 134
    },
    {
      "epoch": 0.02278192633843817,
      "grad_norm": 7.795252167852595e-05,
      "learning_rate": 0.00019633996252560687,
      "loss": 46.0,
      "step": 135
    },
    {
      "epoch": 0.022950681348352528,
      "grad_norm": 0.0001024070952553302,
      "learning_rate": 0.00019628153477849867,
      "loss": 46.0,
      "step": 136
    },
    {
      "epoch": 0.023119436358266885,
      "grad_norm": 6.724517152179033e-05,
      "learning_rate": 0.00019622265318852033,
      "loss": 46.0,
      "step": 137
    },
    {
      "epoch": 0.023288191368181242,
      "grad_norm": 6.319572275970131e-05,
      "learning_rate": 0.00019616331803322236,
      "loss": 46.0,
      "step": 138
    },
    {
      "epoch": 0.0234569463780956,
      "grad_norm": 6.086541907279752e-05,
      "learning_rate": 0.0001961035295922932,
      "loss": 46.0,
      "step": 139
    },
    {
      "epoch": 0.023625701388009956,
      "grad_norm": 5.147139745531604e-05,
      "learning_rate": 0.00019604328814755808,
      "loss": 46.0,
      "step": 140
    },
    {
      "epoch": 0.023794456397924314,
      "grad_norm": 6.334174395306036e-05,
      "learning_rate": 0.0001959825939829774,
      "loss": 46.0,
      "step": 141
    },
    {
      "epoch": 0.02396321140783867,
      "grad_norm": 8.245484787039459e-05,
      "learning_rate": 0.00019592144738464566,
      "loss": 46.0,
      "step": 142
    },
    {
      "epoch": 0.024131966417753028,
      "grad_norm": 3.768013630178757e-05,
      "learning_rate": 0.00019585984864078996,
      "loss": 46.0,
      "step": 143
    },
    {
      "epoch": 0.024300721427667385,
      "grad_norm": 5.31747609784361e-05,
      "learning_rate": 0.0001957977980417687,
      "loss": 46.0,
      "step": 144
    },
    {
      "epoch": 0.024469476437581742,
      "grad_norm": 6.340059917420149e-05,
      "learning_rate": 0.00019573529588007011,
      "loss": 46.0,
      "step": 145
    },
    {
      "epoch": 0.0246382314474961,
      "grad_norm": 8.484098361805081e-05,
      "learning_rate": 0.00019567234245031106,
      "loss": 46.0,
      "step": 146
    },
    {
      "epoch": 0.024806986457410453,
      "grad_norm": 7.689618360018358e-05,
      "learning_rate": 0.00019560893804923554,
      "loss": 46.0,
      "step": 147
    },
    {
      "epoch": 0.02497574146732481,
      "grad_norm": 6.701362144667655e-05,
      "learning_rate": 0.00019554508297571328,
      "loss": 46.0,
      "step": 148
    },
    {
      "epoch": 0.025144496477239167,
      "grad_norm": 4.832363629247993e-05,
      "learning_rate": 0.00019548077753073827,
      "loss": 46.0,
      "step": 149
    },
    {
      "epoch": 0.025313251487153524,
      "grad_norm": 0.00010464687511557713,
      "learning_rate": 0.00019541602201742755,
      "loss": 46.0,
      "step": 150
    },
    {
      "epoch": 0.02548200649706788,
      "grad_norm": 5.5594293371541426e-05,
      "learning_rate": 0.00019535081674101955,
      "loss": 46.0,
      "step": 151
    },
    {
      "epoch": 0.02565076150698224,
      "grad_norm": 0.00010436464071972296,
      "learning_rate": 0.0001952851620088728,
      "loss": 46.0,
      "step": 152
    },
    {
      "epoch": 0.025819516516896596,
      "grad_norm": 7.730885408818722e-05,
      "learning_rate": 0.00019521905813046445,
      "loss": 46.0,
      "step": 153
    },
    {
      "epoch": 0.025988271526810953,
      "grad_norm": 0.0002586382324807346,
      "learning_rate": 0.00019515250541738872,
      "loss": 46.0,
      "step": 154
    },
    {
      "epoch": 0.02615702653672531,
      "grad_norm": 5.2660256187664345e-05,
      "learning_rate": 0.00019508550418335555,
      "loss": 46.0,
      "step": 155
    },
    {
      "epoch": 0.026325781546639667,
      "grad_norm": 6.58825520076789e-05,
      "learning_rate": 0.00019501805474418912,
      "loss": 46.0,
      "step": 156
    },
    {
      "epoch": 0.026494536556554024,
      "grad_norm": 9.116072760662064e-05,
      "learning_rate": 0.00019495015741782622,
      "loss": 46.0,
      "step": 157
    },
    {
      "epoch": 0.026663291566468378,
      "grad_norm": 0.00010203113924944773,
      "learning_rate": 0.00019488181252431489,
      "loss": 46.0,
      "step": 158
    },
    {
      "epoch": 0.026832046576382735,
      "grad_norm": 9.367840539198369e-05,
      "learning_rate": 0.00019481302038581294,
      "loss": 46.0,
      "step": 159
    },
    {
      "epoch": 0.027000801586297092,
      "grad_norm": 5.867075742571615e-05,
      "learning_rate": 0.00019474378132658626,
      "loss": 46.0,
      "step": 160
    },
    {
      "epoch": 0.02716955659621145,
      "grad_norm": 0.0001331541279796511,
      "learning_rate": 0.00019467409567300745,
      "loss": 46.0,
      "step": 161
    },
    {
      "epoch": 0.027338311606125806,
      "grad_norm": 9.494357800576836e-05,
      "learning_rate": 0.0001946039637535542,
      "loss": 46.0,
      "step": 162
    },
    {
      "epoch": 0.027507066616040163,
      "grad_norm": 0.00018060464935842901,
      "learning_rate": 0.0001945333858988078,
      "loss": 46.0,
      "step": 163
    },
    {
      "epoch": 0.02767582162595452,
      "grad_norm": 9.109014354180545e-05,
      "learning_rate": 0.0001944623624414515,
      "loss": 46.0,
      "step": 164
    },
    {
      "epoch": 0.027844576635868878,
      "grad_norm": 0.00021458794071804732,
      "learning_rate": 0.00019439089371626903,
      "loss": 46.0,
      "step": 165
    },
    {
      "epoch": 0.028013331645783235,
      "grad_norm": 0.00023161491844803095,
      "learning_rate": 0.0001943189800601429,
      "loss": 46.0,
      "step": 166
    },
    {
      "epoch": 0.028182086655697592,
      "grad_norm": 0.0001091673257178627,
      "learning_rate": 0.00019424662181205307,
      "loss": 46.0,
      "step": 167
    },
    {
      "epoch": 0.02835084166561195,
      "grad_norm": 9.839528502197936e-05,
      "learning_rate": 0.00019417381931307497,
      "loss": 46.0,
      "step": 168
    },
    {
      "epoch": 0.028519596675526306,
      "grad_norm": 0.0001077549095498398,
      "learning_rate": 0.00019410057290637824,
      "loss": 46.0,
      "step": 169
    },
    {
      "epoch": 0.02868835168544066,
      "grad_norm": 0.00011080451076850295,
      "learning_rate": 0.0001940268829372249,
      "loss": 46.0,
      "step": 170
    },
    {
      "epoch": 0.028857106695355017,
      "grad_norm": 0.00010105837282026187,
      "learning_rate": 0.00019395274975296786,
      "loss": 46.0,
      "step": 171
    },
    {
      "epoch": 0.029025861705269374,
      "grad_norm": 0.00012236724433023483,
      "learning_rate": 0.0001938781737030491,
      "loss": 46.0,
      "step": 172
    },
    {
      "epoch": 0.02919461671518373,
      "grad_norm": 8.416602213401347e-05,
      "learning_rate": 0.00019380315513899826,
      "loss": 46.0,
      "step": 173
    },
    {
      "epoch": 0.02936337172509809,
      "grad_norm": 0.00017547875177115202,
      "learning_rate": 0.00019372769441443083,
      "loss": 46.0,
      "step": 174
    },
    {
      "epoch": 0.029532126735012446,
      "grad_norm": 0.00010037582251243293,
      "learning_rate": 0.00019365179188504647,
      "loss": 46.0,
      "step": 175
    },
    {
      "epoch": 0.029700881744926803,
      "grad_norm": 0.0001204924556077458,
      "learning_rate": 0.0001935754479086274,
      "loss": 46.0,
      "step": 176
    },
    {
      "epoch": 0.02986963675484116,
      "grad_norm": 0.00014140504936221987,
      "learning_rate": 0.00019349866284503674,
      "loss": 46.0,
      "step": 177
    },
    {
      "epoch": 0.030038391764755517,
      "grad_norm": 9.342600969830528e-05,
      "learning_rate": 0.00019342143705621662,
      "loss": 46.0,
      "step": 178
    },
    {
      "epoch": 0.030207146774669874,
      "grad_norm": 4.463369259610772e-05,
      "learning_rate": 0.00019334377090618682,
      "loss": 46.0,
      "step": 179
    },
    {
      "epoch": 0.03037590178458423,
      "grad_norm": 8.116533717839047e-05,
      "learning_rate": 0.00019326566476104274,
      "loss": 46.0,
      "step": 180
    },
    {
      "epoch": 0.03054465679449859,
      "grad_norm": 0.00013790494995191693,
      "learning_rate": 0.00019318711898895377,
      "loss": 46.0,
      "step": 181
    },
    {
      "epoch": 0.030713411804412942,
      "grad_norm": 0.0002199001028202474,
      "learning_rate": 0.00019310813396016162,
      "loss": 46.0,
      "step": 182
    },
    {
      "epoch": 0.0308821668143273,
      "grad_norm": 0.0002289148687850684,
      "learning_rate": 0.0001930287100469785,
      "loss": 46.0,
      "step": 183
    },
    {
      "epoch": 0.031050921824241656,
      "grad_norm": 0.00022609223378822207,
      "learning_rate": 0.00019294884762378547,
      "loss": 46.0,
      "step": 184
    },
    {
      "epoch": 0.031219676834156013,
      "grad_norm": 0.00014787810505367815,
      "learning_rate": 0.00019286854706703044,
      "loss": 46.0,
      "step": 185
    },
    {
      "epoch": 0.03138843184407037,
      "grad_norm": 0.00017034618940670043,
      "learning_rate": 0.00019278780875522667,
      "loss": 46.0,
      "step": 186
    },
    {
      "epoch": 0.03155718685398473,
      "grad_norm": 0.0001577001967234537,
      "learning_rate": 0.0001927066330689509,
      "loss": 46.0,
      "step": 187
    },
    {
      "epoch": 0.031725941863899085,
      "grad_norm": 0.0001635671651456505,
      "learning_rate": 0.0001926250203908414,
      "loss": 46.0,
      "step": 188
    },
    {
      "epoch": 0.03189469687381344,
      "grad_norm": 0.00011218619329156354,
      "learning_rate": 0.00019254297110559638,
      "loss": 46.0,
      "step": 189
    },
    {
      "epoch": 0.0320634518837278,
      "grad_norm": 0.0001787557266652584,
      "learning_rate": 0.0001924604855999721,
      "loss": 46.0,
      "step": 190
    },
    {
      "epoch": 0.03223220689364215,
      "grad_norm": 0.00014260809984989464,
      "learning_rate": 0.00019237756426278095,
      "loss": 46.0,
      "step": 191
    },
    {
      "epoch": 0.03240096190355651,
      "grad_norm": 0.00012893076927866787,
      "learning_rate": 0.00019229420748488978,
      "loss": 46.0,
      "step": 192
    },
    {
      "epoch": 0.03256971691347087,
      "grad_norm": 0.00022735691163688898,
      "learning_rate": 0.00019221041565921796,
      "loss": 46.0,
      "step": 193
    },
    {
      "epoch": 0.03273847192338523,
      "grad_norm": 0.00011990263010375202,
      "learning_rate": 0.0001921261891807355,
      "loss": 46.0,
      "step": 194
    },
    {
      "epoch": 0.03290722693329958,
      "grad_norm": 0.00017182013834826648,
      "learning_rate": 0.00019204152844646134,
      "loss": 46.0,
      "step": 195
    },
    {
      "epoch": 0.03307598194321394,
      "grad_norm": 0.00017154582019429654,
      "learning_rate": 0.00019195643385546126,
      "loss": 46.0,
      "step": 196
    },
    {
      "epoch": 0.033244736953128295,
      "grad_norm": 0.0001479845232097432,
      "learning_rate": 0.00019187090580884622,
      "loss": 46.0,
      "step": 197
    },
    {
      "epoch": 0.033413491963042656,
      "grad_norm": 0.00010758084681583568,
      "learning_rate": 0.00019178494470977023,
      "loss": 46.0,
      "step": 198
    },
    {
      "epoch": 0.03358224697295701,
      "grad_norm": 0.0001167198788607493,
      "learning_rate": 0.0001916985509634287,
      "loss": 46.0,
      "step": 199
    },
    {
      "epoch": 0.03375100198287136,
      "grad_norm": 0.00015376460214611143,
      "learning_rate": 0.00019161172497705637,
      "loss": 46.0,
      "step": 200
    },
    {
      "epoch": 0.033919756992785724,
      "grad_norm": 0.0001339185400865972,
      "learning_rate": 0.00019152446715992543,
      "loss": 46.0,
      "step": 201
    },
    {
      "epoch": 0.03408851200270008,
      "grad_norm": 0.00018876604735851288,
      "learning_rate": 0.0001914367779233436,
      "loss": 46.0,
      "step": 202
    },
    {
      "epoch": 0.03425726701261444,
      "grad_norm": 0.00017353007569909096,
      "learning_rate": 0.00019134865768065216,
      "loss": 46.0,
      "step": 203
    },
    {
      "epoch": 0.03442602202252879,
      "grad_norm": 0.00011807784903794527,
      "learning_rate": 0.00019126010684722406,
      "loss": 46.0,
      "step": 204
    },
    {
      "epoch": 0.03459477703244315,
      "grad_norm": 0.00010566677519818768,
      "learning_rate": 0.00019117112584046193,
      "loss": 46.0,
      "step": 205
    },
    {
      "epoch": 0.034763532042357506,
      "grad_norm": 0.0001312542735831812,
      "learning_rate": 0.00019108171507979606,
      "loss": 46.0,
      "step": 206
    },
    {
      "epoch": 0.03493228705227187,
      "grad_norm": 5.670605969498865e-05,
      "learning_rate": 0.00019099187498668256,
      "loss": 46.0,
      "step": 207
    },
    {
      "epoch": 0.03510104206218622,
      "grad_norm": 8.242291369242594e-05,
      "learning_rate": 0.0001909016059846012,
      "loss": 46.0,
      "step": 208
    },
    {
      "epoch": 0.03526979707210058,
      "grad_norm": 0.0001418525935150683,
      "learning_rate": 0.00019081090849905355,
      "loss": 46.0,
      "step": 209
    },
    {
      "epoch": 0.035438552082014935,
      "grad_norm": 0.0002694391005206853,
      "learning_rate": 0.00019071978295756087,
      "loss": 46.0,
      "step": 210
    },
    {
      "epoch": 0.03560730709192929,
      "grad_norm": 0.00015816248196642846,
      "learning_rate": 0.0001906282297896623,
      "loss": 46.0,
      "step": 211
    },
    {
      "epoch": 0.03577606210184365,
      "grad_norm": 0.00011155927495565265,
      "learning_rate": 0.00019053624942691247,
      "loss": 46.0,
      "step": 212
    },
    {
      "epoch": 0.035944817111758,
      "grad_norm": 0.00010293432569596916,
      "learning_rate": 0.0001904438423028798,
      "loss": 46.0,
      "step": 213
    },
    {
      "epoch": 0.03611357212167236,
      "grad_norm": 0.00017549478798173368,
      "learning_rate": 0.00019035100885314438,
      "loss": 46.0,
      "step": 214
    },
    {
      "epoch": 0.03628232713158672,
      "grad_norm": 8.048818563111126e-05,
      "learning_rate": 0.0001902577495152958,
      "loss": 46.0,
      "step": 215
    },
    {
      "epoch": 0.03645108214150108,
      "grad_norm": 7.043426012387499e-05,
      "learning_rate": 0.0001901640647289312,
      "loss": 46.0,
      "step": 216
    },
    {
      "epoch": 0.03661983715141543,
      "grad_norm": 0.00022185473062563688,
      "learning_rate": 0.00019006995493565305,
      "loss": 46.0,
      "step": 217
    },
    {
      "epoch": 0.03678859216132979,
      "grad_norm": 0.0002446068392600864,
      "learning_rate": 0.0001899754205790674,
      "loss": 46.0,
      "step": 218
    },
    {
      "epoch": 0.036957347171244145,
      "grad_norm": 0.0002539333945605904,
      "learning_rate": 0.00018988046210478132,
      "loss": 46.0,
      "step": 219
    },
    {
      "epoch": 0.037126102181158506,
      "grad_norm": 0.0001403048081556335,
      "learning_rate": 0.00018978507996040124,
      "loss": 46.0,
      "step": 220
    },
    {
      "epoch": 0.03729485719107286,
      "grad_norm": 0.00014873422333039343,
      "learning_rate": 0.00018968927459553055,
      "loss": 46.0,
      "step": 221
    },
    {
      "epoch": 0.03746361220098722,
      "grad_norm": 0.00015955405251588672,
      "learning_rate": 0.00018959304646176754,
      "loss": 46.0,
      "step": 222
    },
    {
      "epoch": 0.037632367210901574,
      "grad_norm": 0.0003200356150045991,
      "learning_rate": 0.00018949639601270347,
      "loss": 46.0,
      "step": 223
    },
    {
      "epoch": 0.03780112222081593,
      "grad_norm": 0.00014675638522021472,
      "learning_rate": 0.00018939932370392004,
      "loss": 46.0,
      "step": 224
    },
    {
      "epoch": 0.03796987723073029,
      "grad_norm": 0.00022176875791046768,
      "learning_rate": 0.00018930182999298768,
      "loss": 46.0,
      "step": 225
    },
    {
      "epoch": 0.03813863224064464,
      "grad_norm": 0.00029244759934954345,
      "learning_rate": 0.0001892039153394631,
      "loss": 46.0,
      "step": 226
    },
    {
      "epoch": 0.038307387250559,
      "grad_norm": 0.00019421910110395402,
      "learning_rate": 0.0001891055802048872,
      "loss": 46.0,
      "step": 227
    },
    {
      "epoch": 0.038476142260473356,
      "grad_norm": 0.00012899210560135543,
      "learning_rate": 0.00018900682505278287,
      "loss": 46.0,
      "step": 228
    },
    {
      "epoch": 0.03864489727038772,
      "grad_norm": 0.00016150598821695894,
      "learning_rate": 0.00018890765034865295,
      "loss": 46.0,
      "step": 229
    },
    {
      "epoch": 0.03881365228030207,
      "grad_norm": 0.0004213732318021357,
      "learning_rate": 0.00018880805655997784,
      "loss": 46.0,
      "step": 230
    },
    {
      "epoch": 0.03898240729021643,
      "grad_norm": 0.0001324907352682203,
      "learning_rate": 0.0001887080441562134,
      "loss": 46.0,
      "step": 231
    },
    {
      "epoch": 0.039151162300130785,
      "grad_norm": 0.00029545003781095147,
      "learning_rate": 0.0001886076136087887,
      "loss": 46.0,
      "step": 232
    },
    {
      "epoch": 0.039319917310045145,
      "grad_norm": 0.00018010212806984782,
      "learning_rate": 0.00018850676539110386,
      "loss": 46.0,
      "step": 233
    },
    {
      "epoch": 0.0394886723199595,
      "grad_norm": 0.00014782045036554337,
      "learning_rate": 0.00018840549997852776,
      "loss": 46.0,
      "step": 234
    },
    {
      "epoch": 0.03965742732987385,
      "grad_norm": 0.0002910486946348101,
      "learning_rate": 0.0001883038178483958,
      "loss": 46.0,
      "step": 235
    },
    {
      "epoch": 0.03982618233978821,
      "grad_norm": 0.00010300084977643564,
      "learning_rate": 0.00018820171948000764,
      "loss": 46.0,
      "step": 236
    },
    {
      "epoch": 0.03999493734970257,
      "grad_norm": 0.00029963982524350286,
      "learning_rate": 0.00018809920535462502,
      "loss": 46.0,
      "step": 237
    },
    {
      "epoch": 0.04016369235961693,
      "grad_norm": 0.0002475226647220552,
      "learning_rate": 0.00018799627595546942,
      "loss": 46.0,
      "step": 238
    },
    {
      "epoch": 0.04033244736953128,
      "grad_norm": 9.632138244342059e-05,
      "learning_rate": 0.00018789293176771978,
      "loss": 46.0,
      "step": 239
    },
    {
      "epoch": 0.04050120237944564,
      "grad_norm": 0.00018374540377408266,
      "learning_rate": 0.00018778917327851025,
      "loss": 46.0,
      "step": 240
    },
    {
      "epoch": 0.040669957389359995,
      "grad_norm": 0.0005576743860729039,
      "learning_rate": 0.00018768500097692784,
      "loss": 46.0,
      "step": 241
    },
    {
      "epoch": 0.040838712399274356,
      "grad_norm": 0.0002332236763322726,
      "learning_rate": 0.00018758041535401018,
      "loss": 46.0,
      "step": 242
    },
    {
      "epoch": 0.04100746740918871,
      "grad_norm": 0.00021743084653280675,
      "learning_rate": 0.00018747541690274325,
      "loss": 46.0,
      "step": 243
    },
    {
      "epoch": 0.04117622241910307,
      "grad_norm": 0.00037613531458191574,
      "learning_rate": 0.00018737000611805877,
      "loss": 46.0,
      "step": 244
    },
    {
      "epoch": 0.041344977429017424,
      "grad_norm": 0.00017969420878216624,
      "learning_rate": 0.00018726418349683231,
      "loss": 46.0,
      "step": 245
    },
    {
      "epoch": 0.04151373243893178,
      "grad_norm": 0.00021221920906100422,
      "learning_rate": 0.00018715794953788059,
      "loss": 46.0,
      "step": 246
    },
    {
      "epoch": 0.04168248744884614,
      "grad_norm": 0.0002182903845096007,
      "learning_rate": 0.0001870513047419593,
      "loss": 46.0,
      "step": 247
    },
    {
      "epoch": 0.04185124245876049,
      "grad_norm": 0.00023534568026661873,
      "learning_rate": 0.00018694424961176065,
      "loss": 46.0,
      "step": 248
    },
    {
      "epoch": 0.04201999746867485,
      "grad_norm": 0.00013651238987222314,
      "learning_rate": 0.00018683678465191108,
      "loss": 46.0,
      "step": 249
    },
    {
      "epoch": 0.042188752478589206,
      "grad_norm": 0.000251735036727041,
      "learning_rate": 0.00018672891036896884,
      "loss": 46.0,
      "step": 250
    },
    {
      "epoch": 0.04235750748850357,
      "grad_norm": 0.0005651676910929382,
      "learning_rate": 0.00018662062727142165,
      "loss": 46.0,
      "step": 251
    },
    {
      "epoch": 0.04252626249841792,
      "grad_norm": 0.00027543309261091053,
      "learning_rate": 0.00018651193586968417,
      "loss": 46.0,
      "step": 252
    },
    {
      "epoch": 0.04269501750833228,
      "grad_norm": 0.00025643999106250703,
      "learning_rate": 0.00018640283667609574,
      "loss": 46.0,
      "step": 253
    },
    {
      "epoch": 0.042863772518246634,
      "grad_norm": 0.00026899727527052164,
      "learning_rate": 0.00018629333020491796,
      "loss": 46.0,
      "step": 254
    },
    {
      "epoch": 0.043032527528160995,
      "grad_norm": 0.0002454131608828902,
      "learning_rate": 0.00018618341697233213,
      "loss": 46.0,
      "step": 255
    },
    {
      "epoch": 0.04320128253807535,
      "grad_norm": 0.00020008228602819145,
      "learning_rate": 0.0001860730974964369,
      "loss": 46.0,
      "step": 256
    },
    {
      "epoch": 0.04337003754798971,
      "grad_norm": 0.0003180755884386599,
      "learning_rate": 0.00018596237229724595,
      "loss": 46.0,
      "step": 257
    },
    {
      "epoch": 0.04353879255790406,
      "grad_norm": 0.00027137139113619924,
      "learning_rate": 0.0001858512418966853,
      "loss": 46.0,
      "step": 258
    },
    {
      "epoch": 0.04370754756781842,
      "grad_norm": 0.0003248886205255985,
      "learning_rate": 0.000185739706818591,
      "loss": 46.0,
      "step": 259
    },
    {
      "epoch": 0.04387630257773278,
      "grad_norm": 0.00022937578614801168,
      "learning_rate": 0.00018562776758870663,
      "loss": 46.0,
      "step": 260
    },
    {
      "epoch": 0.04404505758764713,
      "grad_norm": 0.00026010029250755906,
      "learning_rate": 0.0001855154247346809,
      "loss": 46.0,
      "step": 261
    },
    {
      "epoch": 0.04421381259756149,
      "grad_norm": 0.00012881477596238256,
      "learning_rate": 0.00018540267878606497,
      "loss": 46.0,
      "step": 262
    },
    {
      "epoch": 0.044382567607475845,
      "grad_norm": 0.0001669221237534657,
      "learning_rate": 0.0001852895302743101,
      "loss": 46.0,
      "step": 263
    },
    {
      "epoch": 0.044551322617390206,
      "grad_norm": 8.989863272290677e-05,
      "learning_rate": 0.0001851759797327652,
      "loss": 46.0,
      "step": 264
    },
    {
      "epoch": 0.04472007762730456,
      "grad_norm": 0.00043415901018306613,
      "learning_rate": 0.00018506202769667413,
      "loss": 46.0,
      "step": 265
    },
    {
      "epoch": 0.04488883263721892,
      "grad_norm": 0.00018308595463167876,
      "learning_rate": 0.00018494767470317333,
      "loss": 46.0,
      "step": 266
    },
    {
      "epoch": 0.045057587647133274,
      "grad_norm": 0.0005433953483588994,
      "learning_rate": 0.00018483292129128914,
      "loss": 46.0,
      "step": 267
    },
    {
      "epoch": 0.045226342657047634,
      "grad_norm": 0.0001997397339437157,
      "learning_rate": 0.00018471776800193553,
      "loss": 46.0,
      "step": 268
    },
    {
      "epoch": 0.04539509766696199,
      "grad_norm": 0.0002549294731579721,
      "learning_rate": 0.00018460221537791122,
      "loss": 46.0,
      "step": 269
    },
    {
      "epoch": 0.04556385267687634,
      "grad_norm": 0.0004634494544006884,
      "learning_rate": 0.00018448626396389738,
      "loss": 46.0,
      "step": 270
    },
    {
      "epoch": 0.0457326076867907,
      "grad_norm": 0.00015057041309773922,
      "learning_rate": 0.00018436991430645488,
      "loss": 46.0,
      "step": 271
    },
    {
      "epoch": 0.045901362696705056,
      "grad_norm": 0.00030916737159714103,
      "learning_rate": 0.00018425316695402181,
      "loss": 46.0,
      "step": 272
    },
    {
      "epoch": 0.046070117706619416,
      "grad_norm": 0.0003056859422940761,
      "learning_rate": 0.00018413602245691092,
      "loss": 46.0,
      "step": 273
    },
    {
      "epoch": 0.04623887271653377,
      "grad_norm": 0.00028438231674954295,
      "learning_rate": 0.00018401848136730698,
      "loss": 46.0,
      "step": 274
    },
    {
      "epoch": 0.04640762772644813,
      "grad_norm": 0.00034854307887144387,
      "learning_rate": 0.00018390054423926406,
      "loss": 46.0,
      "step": 275
    },
    {
      "epoch": 0.046576382736362484,
      "grad_norm": 0.00025173407630063593,
      "learning_rate": 0.00018378221162870326,
      "loss": 46.0,
      "step": 276
    },
    {
      "epoch": 0.046745137746276845,
      "grad_norm": 0.0006683963001705706,
      "learning_rate": 0.00018366348409340965,
      "loss": 46.0,
      "step": 277
    },
    {
      "epoch": 0.0469138927561912,
      "grad_norm": 0.00017825645045377314,
      "learning_rate": 0.00018354436219303,
      "loss": 46.0,
      "step": 278
    },
    {
      "epoch": 0.04708264776610556,
      "grad_norm": 0.00026973988860845566,
      "learning_rate": 0.00018342484648906996,
      "loss": 46.0,
      "step": 279
    },
    {
      "epoch": 0.04725140277601991,
      "grad_norm": 0.00027474435046315193,
      "learning_rate": 0.00018330493754489138,
      "loss": 46.0,
      "step": 280
    },
    {
      "epoch": 0.04742015778593427,
      "grad_norm": 0.00025235096109099686,
      "learning_rate": 0.00018318463592570988,
      "loss": 46.0,
      "step": 281
    },
    {
      "epoch": 0.04758891279584863,
      "grad_norm": 0.00020448811119422317,
      "learning_rate": 0.0001830639421985919,
      "loss": 46.0,
      "step": 282
    },
    {
      "epoch": 0.04775766780576298,
      "grad_norm": 0.00028648230363614857,
      "learning_rate": 0.00018294285693245223,
      "loss": 46.0,
      "step": 283
    },
    {
      "epoch": 0.04792642281567734,
      "grad_norm": 0.00027214092551730573,
      "learning_rate": 0.00018282138069805127,
      "loss": 46.0,
      "step": 284
    },
    {
      "epoch": 0.048095177825591695,
      "grad_norm": 0.00021831200865563005,
      "learning_rate": 0.00018269951406799223,
      "loss": 46.0,
      "step": 285
    },
    {
      "epoch": 0.048263932835506056,
      "grad_norm": 0.000336469296598807,
      "learning_rate": 0.00018257725761671866,
      "loss": 46.0,
      "step": 286
    },
    {
      "epoch": 0.04843268784542041,
      "grad_norm": 0.00034162108204327524,
      "learning_rate": 0.00018245461192051157,
      "loss": 46.0,
      "step": 287
    },
    {
      "epoch": 0.04860144285533477,
      "grad_norm": 0.00035298787406645715,
      "learning_rate": 0.00018233157755748669,
      "loss": 46.0,
      "step": 288
    },
    {
      "epoch": 0.048770197865249124,
      "grad_norm": 0.00015676271868869662,
      "learning_rate": 0.0001822081551075919,
      "loss": 46.0,
      "step": 289
    },
    {
      "epoch": 0.048938952875163484,
      "grad_norm": 0.00023883357062004507,
      "learning_rate": 0.0001820843451526044,
      "loss": 46.0,
      "step": 290
    },
    {
      "epoch": 0.04910770788507784,
      "grad_norm": 0.0002669495588634163,
      "learning_rate": 0.0001819601482761278,
      "loss": 46.0,
      "step": 291
    },
    {
      "epoch": 0.0492764628949922,
      "grad_norm": 0.00034910603426396847,
      "learning_rate": 0.0001818355650635899,
      "loss": 46.0,
      "step": 292
    },
    {
      "epoch": 0.04944521790490655,
      "grad_norm": 0.00024713424500077963,
      "learning_rate": 0.0001817105961022392,
      "loss": 46.0,
      "step": 293
    },
    {
      "epoch": 0.049613972914820906,
      "grad_norm": 0.0002389974833931774,
      "learning_rate": 0.00018158524198114278,
      "loss": 46.0,
      "step": 294
    },
    {
      "epoch": 0.049782727924735266,
      "grad_norm": 0.00028152199229225516,
      "learning_rate": 0.0001814595032911831,
      "loss": 46.0,
      "step": 295
    },
    {
      "epoch": 0.04995148293464962,
      "grad_norm": 0.0004107706481590867,
      "learning_rate": 0.00018133338062505534,
      "loss": 46.0,
      "step": 296
    },
    {
      "epoch": 0.05012023794456398,
      "grad_norm": 0.00032018640195019543,
      "learning_rate": 0.00018120687457726478,
      "loss": 46.0,
      "step": 297
    },
    {
      "epoch": 0.050288992954478334,
      "grad_norm": 0.0003432031662669033,
      "learning_rate": 0.00018107998574412376,
      "loss": 46.0,
      "step": 298
    },
    {
      "epoch": 0.050457747964392695,
      "grad_norm": 0.00042492407374083996,
      "learning_rate": 0.00018095271472374892,
      "loss": 46.0,
      "step": 299
    },
    {
      "epoch": 0.05062650297430705,
      "grad_norm": 0.0004913901793770492,
      "learning_rate": 0.00018082506211605852,
      "loss": 46.0,
      "step": 300
    },
    {
      "epoch": 0.05079525798422141,
      "grad_norm": 0.00045284689986146986,
      "learning_rate": 0.00018069702852276941,
      "loss": 46.0,
      "step": 301
    },
    {
      "epoch": 0.05096401299413576,
      "grad_norm": 0.0002827131829690188,
      "learning_rate": 0.00018056861454739432,
      "loss": 46.0,
      "step": 302
    },
    {
      "epoch": 0.05113276800405012,
      "grad_norm": 0.000371127447579056,
      "learning_rate": 0.00018043982079523905,
      "loss": 46.0,
      "step": 303
    },
    {
      "epoch": 0.05130152301396448,
      "grad_norm": 0.0001193537755170837,
      "learning_rate": 0.00018031064787339947,
      "loss": 46.0,
      "step": 304
    },
    {
      "epoch": 0.05147027802387883,
      "grad_norm": 0.0002617594145704061,
      "learning_rate": 0.00018018109639075886,
      "loss": 46.0,
      "step": 305
    },
    {
      "epoch": 0.05163903303379319,
      "grad_norm": 0.0003127566596958786,
      "learning_rate": 0.00018005116695798476,
      "loss": 46.0,
      "step": 306
    },
    {
      "epoch": 0.051807788043707545,
      "grad_norm": 0.0003426918410696089,
      "learning_rate": 0.00017992086018752638,
      "loss": 46.0,
      "step": 307
    },
    {
      "epoch": 0.051976543053621906,
      "grad_norm": 0.000528005592059344,
      "learning_rate": 0.0001797901766936116,
      "loss": 46.0,
      "step": 308
    },
    {
      "epoch": 0.05214529806353626,
      "grad_norm": 0.00044293675455264747,
      "learning_rate": 0.00017965911709224395,
      "loss": 46.0,
      "step": 309
    },
    {
      "epoch": 0.05231405307345062,
      "grad_norm": 0.0004309279902372509,
      "learning_rate": 0.00017952768200119992,
      "loss": 46.0,
      "step": 310
    },
    {
      "epoch": 0.052482808083364973,
      "grad_norm": 0.00038651516661047935,
      "learning_rate": 0.0001793958720400259,
      "loss": 46.0,
      "step": 311
    },
    {
      "epoch": 0.052651563093279334,
      "grad_norm": 0.000503813847899437,
      "learning_rate": 0.00017926368783003537,
      "loss": 46.0,
      "step": 312
    },
    {
      "epoch": 0.05282031810319369,
      "grad_norm": 0.0007580111850984395,
      "learning_rate": 0.00017913112999430584,
      "loss": 46.0,
      "step": 313
    },
    {
      "epoch": 0.05298907311310805,
      "grad_norm": 0.0003376993117853999,
      "learning_rate": 0.00017899819915767598,
      "loss": 46.0,
      "step": 314
    },
    {
      "epoch": 0.0531578281230224,
      "grad_norm": 0.0003070792299695313,
      "learning_rate": 0.00017886489594674273,
      "loss": 46.0,
      "step": 315
    },
    {
      "epoch": 0.053326583132936756,
      "grad_norm": 0.0006369905895553529,
      "learning_rate": 0.00017873122098985826,
      "loss": 46.0,
      "step": 316
    },
    {
      "epoch": 0.053495338142851116,
      "grad_norm": 0.0005446787690743804,
      "learning_rate": 0.00017859717491712707,
      "loss": 46.0,
      "step": 317
    },
    {
      "epoch": 0.05366409315276547,
      "grad_norm": 0.0003436711267568171,
      "learning_rate": 0.0001784627583604029,
      "loss": 46.0,
      "step": 318
    },
    {
      "epoch": 0.05383284816267983,
      "grad_norm": 0.0004495533648878336,
      "learning_rate": 0.000178327971953286,
      "loss": 46.0,
      "step": 319
    },
    {
      "epoch": 0.054001603172594184,
      "grad_norm": 0.0004997072974219918,
      "learning_rate": 0.00017819281633111984,
      "loss": 46.0,
      "step": 320
    },
    {
      "epoch": 0.054170358182508545,
      "grad_norm": 0.00040087226079776883,
      "learning_rate": 0.0001780572921309883,
      "loss": 46.0,
      "step": 321
    },
    {
      "epoch": 0.0543391131924229,
      "grad_norm": 0.0005316153983585536,
      "learning_rate": 0.0001779213999917127,
      "loss": 46.0,
      "step": 322
    },
    {
      "epoch": 0.05450786820233726,
      "grad_norm": 0.0006078218575567007,
      "learning_rate": 0.00017778514055384866,
      "loss": 46.0,
      "step": 323
    },
    {
      "epoch": 0.05467662321225161,
      "grad_norm": 0.0004828522796742618,
      "learning_rate": 0.00017764851445968308,
      "loss": 46.0,
      "step": 324
    },
    {
      "epoch": 0.05484537822216597,
      "grad_norm": 0.00032997396192513406,
      "learning_rate": 0.0001775115223532313,
      "loss": 46.0,
      "step": 325
    },
    {
      "epoch": 0.05501413323208033,
      "grad_norm": 0.00033607761724852026,
      "learning_rate": 0.00017737416488023384,
      "loss": 46.0,
      "step": 326
    },
    {
      "epoch": 0.05518288824199469,
      "grad_norm": 0.0004014720907434821,
      "learning_rate": 0.00017723644268815344,
      "loss": 46.0,
      "step": 327
    },
    {
      "epoch": 0.05535164325190904,
      "grad_norm": 0.000525740790180862,
      "learning_rate": 0.00017709835642617212,
      "loss": 46.0,
      "step": 328
    },
    {
      "epoch": 0.055520398261823395,
      "grad_norm": 0.0007568416767753661,
      "learning_rate": 0.00017695990674518788,
      "loss": 46.0,
      "step": 329
    },
    {
      "epoch": 0.055689153271737755,
      "grad_norm": 0.0004670285852625966,
      "learning_rate": 0.0001768210942978119,
      "loss": 46.0,
      "step": 330
    },
    {
      "epoch": 0.05585790828165211,
      "grad_norm": 0.0005972511135041714,
      "learning_rate": 0.00017668191973836529,
      "loss": 46.0,
      "step": 331
    },
    {
      "epoch": 0.05602666329156647,
      "grad_norm": 0.00048381276428699493,
      "learning_rate": 0.000176542383722876,
      "loss": 46.0,
      "step": 332
    },
    {
      "epoch": 0.05619541830148082,
      "grad_norm": 0.0007428377284668386,
      "learning_rate": 0.0001764024869090758,
      "loss": 46.0,
      "step": 333
    },
    {
      "epoch": 0.056364173311395184,
      "grad_norm": 0.000608109578024596,
      "learning_rate": 0.00017626222995639724,
      "loss": 46.0,
      "step": 334
    },
    {
      "epoch": 0.05653292832130954,
      "grad_norm": 0.0006571222911588848,
      "learning_rate": 0.00017612161352597032,
      "loss": 46.0,
      "step": 335
    },
    {
      "epoch": 0.0567016833312239,
      "grad_norm": 0.0007276976830326021,
      "learning_rate": 0.00017598063828061958,
      "loss": 46.0,
      "step": 336
    },
    {
      "epoch": 0.05687043834113825,
      "grad_norm": 0.0007646044250577688,
      "learning_rate": 0.000175839304884861,
      "loss": 46.0,
      "step": 337
    },
    {
      "epoch": 0.05703919335105261,
      "grad_norm": 0.0004487757687456906,
      "learning_rate": 0.00017569761400489862,
      "loss": 46.0,
      "step": 338
    },
    {
      "epoch": 0.057207948360966966,
      "grad_norm": 0.0004590119933709502,
      "learning_rate": 0.0001755555663086216,
      "loss": 46.0,
      "step": 339
    },
    {
      "epoch": 0.05737670337088132,
      "grad_norm": 0.00019634263298939914,
      "learning_rate": 0.0001754131624656011,
      "loss": 46.0,
      "step": 340
    },
    {
      "epoch": 0.05754545838079568,
      "grad_norm": 0.0008665765053592622,
      "learning_rate": 0.00017527040314708702,
      "loss": 46.0,
      "step": 341
    },
    {
      "epoch": 0.057714213390710034,
      "grad_norm": 0.0010710041970014572,
      "learning_rate": 0.0001751272890260048,
      "loss": 46.0,
      "step": 342
    },
    {
      "epoch": 0.057882968400624395,
      "grad_norm": 0.0010121267987415195,
      "learning_rate": 0.0001749838207769524,
      "loss": 46.0,
      "step": 343
    },
    {
      "epoch": 0.05805172341053875,
      "grad_norm": 0.0006279960507526994,
      "learning_rate": 0.00017483999907619695,
      "loss": 46.0,
      "step": 344
    },
    {
      "epoch": 0.05822047842045311,
      "grad_norm": 0.0006246848497539759,
      "learning_rate": 0.00017469582460167174,
      "loss": 46.0,
      "step": 345
    },
    {
      "epoch": 0.05838923343036746,
      "grad_norm": 0.000596629804931581,
      "learning_rate": 0.00017455129803297287,
      "loss": 46.0,
      "step": 346
    },
    {
      "epoch": 0.05855798844028182,
      "grad_norm": 0.0008878617081791162,
      "learning_rate": 0.00017440642005135614,
      "loss": 46.0,
      "step": 347
    },
    {
      "epoch": 0.05872674345019618,
      "grad_norm": 0.0003601356002036482,
      "learning_rate": 0.0001742611913397338,
      "loss": 46.0,
      "step": 348
    },
    {
      "epoch": 0.05889549846011054,
      "grad_norm": 0.0003498071455396712,
      "learning_rate": 0.00017411561258267127,
      "loss": 46.0,
      "step": 349
    },
    {
      "epoch": 0.05906425347002489,
      "grad_norm": 0.0007128751021809876,
      "learning_rate": 0.0001739696844663841,
      "loss": 46.0,
      "step": 350
    },
    {
      "epoch": 0.059233008479939245,
      "grad_norm": 0.00047719714348204434,
      "learning_rate": 0.0001738234076787346,
      "loss": 46.0,
      "step": 351
    },
    {
      "epoch": 0.059401763489853605,
      "grad_norm": 0.0006972616538405418,
      "learning_rate": 0.00017367678290922852,
      "loss": 46.0,
      "step": 352
    },
    {
      "epoch": 0.05957051849976796,
      "grad_norm": 0.0005275082658044994,
      "learning_rate": 0.00017352981084901194,
      "loss": 46.0,
      "step": 353
    },
    {
      "epoch": 0.05973927350968232,
      "grad_norm": 0.001250717556104064,
      "learning_rate": 0.000173382492190868,
      "loss": 46.0,
      "step": 354
    },
    {
      "epoch": 0.05990802851959667,
      "grad_norm": 0.0008419329533353448,
      "learning_rate": 0.00017323482762921354,
      "loss": 46.0,
      "step": 355
    },
    {
      "epoch": 0.060076783529511034,
      "grad_norm": 0.0008670453680679202,
      "learning_rate": 0.000173086817860096,
      "loss": 46.0,
      "step": 356
    },
    {
      "epoch": 0.06024553853942539,
      "grad_norm": 0.0014900369569659233,
      "learning_rate": 0.00017293846358118988,
      "loss": 46.0,
      "step": 357
    },
    {
      "epoch": 0.06041429354933975,
      "grad_norm": 0.001586690079420805,
      "learning_rate": 0.0001727897654917937,
      "loss": 46.0,
      "step": 358
    },
    {
      "epoch": 0.0605830485592541,
      "grad_norm": 0.0009236481855623424,
      "learning_rate": 0.00017264072429282656,
      "loss": 46.0,
      "step": 359
    },
    {
      "epoch": 0.06075180356916846,
      "grad_norm": 0.0011958472896367311,
      "learning_rate": 0.00017249134068682487,
      "loss": 46.0,
      "step": 360
    },
    {
      "epoch": 0.060920558579082816,
      "grad_norm": 0.0006783442222513258,
      "learning_rate": 0.00017234161537793913,
      "loss": 46.0,
      "step": 361
    },
    {
      "epoch": 0.06108931358899718,
      "grad_norm": 0.0010664722649380565,
      "learning_rate": 0.0001721915490719304,
      "loss": 46.0,
      "step": 362
    },
    {
      "epoch": 0.06125806859891153,
      "grad_norm": 0.0012871964136138558,
      "learning_rate": 0.00017204114247616715,
      "loss": 46.0,
      "step": 363
    },
    {
      "epoch": 0.061426823608825884,
      "grad_norm": 0.0006326769944280386,
      "learning_rate": 0.00017189039629962193,
      "loss": 46.0,
      "step": 364
    },
    {
      "epoch": 0.061595578618740245,
      "grad_norm": 0.0005951938219368458,
      "learning_rate": 0.00017173931125286792,
      "loss": 46.0,
      "step": 365
    },
    {
      "epoch": 0.061595578618740245,
      "eval_loss": 11.5,
      "eval_runtime": 14.8849,
      "eval_samples_per_second": 167.619,
      "eval_steps_per_second": 83.843,
      "step": 365
    },
    {
      "epoch": 0.0617643336286546,
      "grad_norm": 0.000900912971701473,
      "learning_rate": 0.00017158788804807565,
      "loss": 46.0,
      "step": 366
    },
    {
      "epoch": 0.06193308863856896,
      "grad_norm": 0.0008746191742829978,
      "learning_rate": 0.00017143612739900963,
      "loss": 46.0,
      "step": 367
    },
    {
      "epoch": 0.06210184364848331,
      "grad_norm": 0.0008302325149998069,
      "learning_rate": 0.00017128403002102493,
      "loss": 46.0,
      "step": 368
    },
    {
      "epoch": 0.06227059865839767,
      "grad_norm": 0.0016092363512143493,
      "learning_rate": 0.00017113159663106396,
      "loss": 46.0,
      "step": 369
    },
    {
      "epoch": 0.06243935366831203,
      "grad_norm": 0.0010375389829277992,
      "learning_rate": 0.00017097882794765292,
      "loss": 46.0,
      "step": 370
    },
    {
      "epoch": 0.06260810867822639,
      "grad_norm": 0.0010077969636768103,
      "learning_rate": 0.00017082572469089845,
      "loss": 46.0,
      "step": 371
    },
    {
      "epoch": 0.06277686368814074,
      "grad_norm": 0.0014613711973652244,
      "learning_rate": 0.00017067228758248443,
      "loss": 46.0,
      "step": 372
    },
    {
      "epoch": 0.0629456186980551,
      "grad_norm": 0.0010287724435329437,
      "learning_rate": 0.00017051851734566827,
      "loss": 46.0,
      "step": 373
    },
    {
      "epoch": 0.06311437370796946,
      "grad_norm": 0.0012760079698637128,
      "learning_rate": 0.00017036441470527768,
      "loss": 46.0,
      "step": 374
    },
    {
      "epoch": 0.06328312871788382,
      "grad_norm": 0.001163105363957584,
      "learning_rate": 0.00017020998038770724,
      "loss": 46.0,
      "step": 375
    },
    {
      "epoch": 0.06345188372779817,
      "grad_norm": 0.0008599930442869663,
      "learning_rate": 0.00017005521512091493,
      "loss": 46.0,
      "step": 376
    },
    {
      "epoch": 0.06362063873771252,
      "grad_norm": 0.001305455924011767,
      "learning_rate": 0.0001699001196344188,
      "loss": 46.0,
      "step": 377
    },
    {
      "epoch": 0.06378939374762688,
      "grad_norm": 0.0006688539870083332,
      "learning_rate": 0.00016974469465929338,
      "loss": 46.0,
      "step": 378
    },
    {
      "epoch": 0.06395814875754124,
      "grad_norm": 0.0010529232677072287,
      "learning_rate": 0.00016958894092816636,
      "loss": 46.0,
      "step": 379
    },
    {
      "epoch": 0.0641269037674556,
      "grad_norm": 0.00040389568312093616,
      "learning_rate": 0.00016943285917521506,
      "loss": 46.0,
      "step": 380
    },
    {
      "epoch": 0.06429565877736995,
      "grad_norm": 0.0007607465959154069,
      "learning_rate": 0.00016927645013616301,
      "loss": 46.0,
      "step": 381
    },
    {
      "epoch": 0.0644644137872843,
      "grad_norm": 0.0007972440216690302,
      "learning_rate": 0.0001691197145482765,
      "loss": 46.0,
      "step": 382
    },
    {
      "epoch": 0.06463316879719867,
      "grad_norm": 0.0007731697405688465,
      "learning_rate": 0.00016896265315036098,
      "loss": 46.0,
      "step": 383
    },
    {
      "epoch": 0.06480192380711303,
      "grad_norm": 0.0010464886436238885,
      "learning_rate": 0.00016880526668275783,
      "loss": 46.0,
      "step": 384
    },
    {
      "epoch": 0.06497067881702738,
      "grad_norm": 0.0012620363850146532,
      "learning_rate": 0.00016864755588734057,
      "loss": 46.0,
      "step": 385
    },
    {
      "epoch": 0.06513943382694173,
      "grad_norm": 0.0008887553703971207,
      "learning_rate": 0.00016848952150751154,
      "loss": 46.0,
      "step": 386
    },
    {
      "epoch": 0.06530818883685609,
      "grad_norm": 0.0008710179827176034,
      "learning_rate": 0.0001683311642881984,
      "loss": 46.0,
      "step": 387
    },
    {
      "epoch": 0.06547694384677046,
      "grad_norm": 0.0006600015913136303,
      "learning_rate": 0.0001681724849758506,
      "loss": 46.0,
      "step": 388
    },
    {
      "epoch": 0.06564569885668481,
      "grad_norm": 0.0011270915856584907,
      "learning_rate": 0.00016801348431843573,
      "loss": 46.0,
      "step": 389
    },
    {
      "epoch": 0.06581445386659916,
      "grad_norm": 0.0004072414885740727,
      "learning_rate": 0.00016785416306543626,
      "loss": 46.0,
      "step": 390
    },
    {
      "epoch": 0.06598320887651352,
      "grad_norm": 0.0013463982613757253,
      "learning_rate": 0.0001676945219678457,
      "loss": 46.0,
      "step": 391
    },
    {
      "epoch": 0.06615196388642788,
      "grad_norm": 0.0008421620586887002,
      "learning_rate": 0.00016753456177816535,
      "loss": 46.0,
      "step": 392
    },
    {
      "epoch": 0.06632071889634224,
      "grad_norm": 0.0011627651983872056,
      "learning_rate": 0.00016737428325040048,
      "loss": 46.0,
      "step": 393
    },
    {
      "epoch": 0.06648947390625659,
      "grad_norm": 0.0010495700407773256,
      "learning_rate": 0.00016721368714005705,
      "loss": 46.0,
      "step": 394
    },
    {
      "epoch": 0.06665822891617094,
      "grad_norm": 0.001558567862957716,
      "learning_rate": 0.0001670527742041379,
      "loss": 46.0,
      "step": 395
    },
    {
      "epoch": 0.06682698392608531,
      "grad_norm": 0.0007277853437699378,
      "learning_rate": 0.0001668915452011394,
      "loss": 46.0,
      "step": 396
    },
    {
      "epoch": 0.06699573893599967,
      "grad_norm": 0.0010374211706221104,
      "learning_rate": 0.0001667300008910476,
      "loss": 46.0,
      "step": 397
    },
    {
      "epoch": 0.06716449394591402,
      "grad_norm": 0.0010681662242859602,
      "learning_rate": 0.000166568142035335,
      "loss": 46.0,
      "step": 398
    },
    {
      "epoch": 0.06733324895582837,
      "grad_norm": 0.0012568504316732287,
      "learning_rate": 0.00016640596939695673,
      "loss": 46.0,
      "step": 399
    },
    {
      "epoch": 0.06750200396574273,
      "grad_norm": 0.0010285298340022564,
      "learning_rate": 0.00016624348374034685,
      "loss": 46.0,
      "step": 400
    },
    {
      "epoch": 0.0676707589756571,
      "grad_norm": 0.0012999747414141893,
      "learning_rate": 0.0001660806858314151,
      "loss": 46.0,
      "step": 401
    },
    {
      "epoch": 0.06783951398557145,
      "grad_norm": 0.0010224470170214772,
      "learning_rate": 0.000165917576437543,
      "loss": 46.0,
      "step": 402
    },
    {
      "epoch": 0.0680082689954858,
      "grad_norm": 0.001064454554580152,
      "learning_rate": 0.00016575415632758027,
      "loss": 46.0,
      "step": 403
    },
    {
      "epoch": 0.06817702400540016,
      "grad_norm": 0.0012302878312766552,
      "learning_rate": 0.0001655904262718413,
      "loss": 46.0,
      "step": 404
    },
    {
      "epoch": 0.06834577901531452,
      "grad_norm": 0.0010292811784893274,
      "learning_rate": 0.00016542638704210153,
      "loss": 46.0,
      "step": 405
    },
    {
      "epoch": 0.06851453402522888,
      "grad_norm": 0.0017992197535932064,
      "learning_rate": 0.00016526203941159365,
      "loss": 46.0,
      "step": 406
    },
    {
      "epoch": 0.06868328903514323,
      "grad_norm": 0.0012390941847115755,
      "learning_rate": 0.00016509738415500412,
      "loss": 46.0,
      "step": 407
    },
    {
      "epoch": 0.06885204404505758,
      "grad_norm": 0.0012958311708644032,
      "learning_rate": 0.00016493242204846945,
      "loss": 46.0,
      "step": 408
    },
    {
      "epoch": 0.06902079905497195,
      "grad_norm": 0.0008734130533412099,
      "learning_rate": 0.00016476715386957256,
      "loss": 46.0,
      "step": 409
    },
    {
      "epoch": 0.0691895540648863,
      "grad_norm": 0.0009476335253566504,
      "learning_rate": 0.00016460158039733908,
      "loss": 46.0,
      "step": 410
    },
    {
      "epoch": 0.06935830907480066,
      "grad_norm": 0.0008190583321265876,
      "learning_rate": 0.0001644357024122337,
      "loss": 46.0,
      "step": 411
    },
    {
      "epoch": 0.06952706408471501,
      "grad_norm": 0.0010068168630823493,
      "learning_rate": 0.00016426952069615656,
      "loss": 46.0,
      "step": 412
    },
    {
      "epoch": 0.06969581909462937,
      "grad_norm": 0.0009863304439932108,
      "learning_rate": 0.00016410303603243943,
      "loss": 46.0,
      "step": 413
    },
    {
      "epoch": 0.06986457410454373,
      "grad_norm": 0.0013538243947550654,
      "learning_rate": 0.0001639362492058421,
      "loss": 46.0,
      "step": 414
    },
    {
      "epoch": 0.07003332911445809,
      "grad_norm": 0.001239404664374888,
      "learning_rate": 0.00016376916100254864,
      "loss": 46.0,
      "step": 415
    },
    {
      "epoch": 0.07020208412437244,
      "grad_norm": 0.0022674521896988153,
      "learning_rate": 0.0001636017722101638,
      "loss": 46.0,
      "step": 416
    },
    {
      "epoch": 0.0703708391342868,
      "grad_norm": 0.0006947132642380893,
      "learning_rate": 0.00016343408361770915,
      "loss": 46.0,
      "step": 417
    },
    {
      "epoch": 0.07053959414420116,
      "grad_norm": 0.0007783303153701127,
      "learning_rate": 0.00016326609601561952,
      "loss": 46.0,
      "step": 418
    },
    {
      "epoch": 0.07070834915411552,
      "grad_norm": 0.0016195345669984818,
      "learning_rate": 0.000163097810195739,
      "loss": 46.0,
      "step": 419
    },
    {
      "epoch": 0.07087710416402987,
      "grad_norm": 0.000915382755920291,
      "learning_rate": 0.00016292922695131755,
      "loss": 46.0,
      "step": 420
    },
    {
      "epoch": 0.07104585917394422,
      "grad_norm": 0.001205647480674088,
      "learning_rate": 0.00016276034707700713,
      "loss": 46.0,
      "step": 421
    },
    {
      "epoch": 0.07121461418385858,
      "grad_norm": 0.0012716164346784353,
      "learning_rate": 0.0001625911713688578,
      "loss": 46.0,
      "step": 422
    },
    {
      "epoch": 0.07138336919377294,
      "grad_norm": 0.0010019976180046797,
      "learning_rate": 0.0001624217006243141,
      "loss": 46.0,
      "step": 423
    },
    {
      "epoch": 0.0715521242036873,
      "grad_norm": 0.0013898113975301385,
      "learning_rate": 0.00016225193564221142,
      "loss": 46.0,
      "step": 424
    },
    {
      "epoch": 0.07172087921360165,
      "grad_norm": 0.0012531159445643425,
      "learning_rate": 0.000162081877222772,
      "loss": 46.0,
      "step": 425
    },
    {
      "epoch": 0.071889634223516,
      "grad_norm": 0.0008526226156391203,
      "learning_rate": 0.0001619115261676012,
      "loss": 46.0,
      "step": 426
    },
    {
      "epoch": 0.07205838923343037,
      "grad_norm": 0.0010567533317953348,
      "learning_rate": 0.00016174088327968394,
      "loss": 46.0,
      "step": 427
    },
    {
      "epoch": 0.07222714424334473,
      "grad_norm": 0.0010411246912553906,
      "learning_rate": 0.0001615699493633806,
      "loss": 46.0,
      "step": 428
    },
    {
      "epoch": 0.07239589925325908,
      "grad_norm": 0.001222698949277401,
      "learning_rate": 0.00016139872522442347,
      "loss": 46.0,
      "step": 429
    },
    {
      "epoch": 0.07256465426317343,
      "grad_norm": 0.0014631192898377776,
      "learning_rate": 0.00016122721166991286,
      "loss": 46.0,
      "step": 430
    },
    {
      "epoch": 0.0727334092730878,
      "grad_norm": 0.0016150050796568394,
      "learning_rate": 0.00016105540950831323,
      "loss": 46.0,
      "step": 431
    },
    {
      "epoch": 0.07290216428300215,
      "grad_norm": 0.0011149218771606684,
      "learning_rate": 0.00016088331954944952,
      "loss": 46.0,
      "step": 432
    },
    {
      "epoch": 0.07307091929291651,
      "grad_norm": 0.0016063664807006717,
      "learning_rate": 0.00016071094260450327,
      "loss": 46.0,
      "step": 433
    },
    {
      "epoch": 0.07323967430283086,
      "grad_norm": 0.0010584626579657197,
      "learning_rate": 0.00016053827948600872,
      "loss": 46.0,
      "step": 434
    },
    {
      "epoch": 0.07340842931274522,
      "grad_norm": 0.0011532072676345706,
      "learning_rate": 0.00016036533100784913,
      "loss": 46.0,
      "step": 435
    },
    {
      "epoch": 0.07357718432265958,
      "grad_norm": 0.0015770825557410717,
      "learning_rate": 0.00016019209798525278,
      "loss": 46.0,
      "step": 436
    },
    {
      "epoch": 0.07374593933257394,
      "grad_norm": 0.0012206478277221322,
      "learning_rate": 0.00016001858123478927,
      "loss": 46.0,
      "step": 437
    },
    {
      "epoch": 0.07391469434248829,
      "grad_norm": 0.0011760505149140954,
      "learning_rate": 0.00015984478157436558,
      "loss": 46.0,
      "step": 438
    },
    {
      "epoch": 0.07408344935240264,
      "grad_norm": 0.0006681337836198509,
      "learning_rate": 0.00015967069982322223,
      "loss": 46.0,
      "step": 439
    },
    {
      "epoch": 0.07425220436231701,
      "grad_norm": 0.0010028331307694316,
      "learning_rate": 0.0001594963368019295,
      "loss": 46.0,
      "step": 440
    },
    {
      "epoch": 0.07442095937223137,
      "grad_norm": 0.0014602706069126725,
      "learning_rate": 0.0001593216933323834,
      "loss": 46.0,
      "step": 441
    },
    {
      "epoch": 0.07458971438214572,
      "grad_norm": 0.00084131263429299,
      "learning_rate": 0.000159146770237802,
      "loss": 46.0,
      "step": 442
    },
    {
      "epoch": 0.07475846939206007,
      "grad_norm": 0.0013664717553183436,
      "learning_rate": 0.00015897156834272132,
      "loss": 46.0,
      "step": 443
    },
    {
      "epoch": 0.07492722440197444,
      "grad_norm": 0.0013984786346554756,
      "learning_rate": 0.00015879608847299163,
      "loss": 46.0,
      "step": 444
    },
    {
      "epoch": 0.0750959794118888,
      "grad_norm": 0.0006391989882104099,
      "learning_rate": 0.00015862033145577352,
      "loss": 46.0,
      "step": 445
    },
    {
      "epoch": 0.07526473442180315,
      "grad_norm": 0.0014059271197766066,
      "learning_rate": 0.00015844429811953393,
      "loss": 46.0,
      "step": 446
    },
    {
      "epoch": 0.0754334894317175,
      "grad_norm": 0.0005209510563872755,
      "learning_rate": 0.00015826798929404227,
      "loss": 46.0,
      "step": 447
    },
    {
      "epoch": 0.07560224444163186,
      "grad_norm": 0.0006487764185294509,
      "learning_rate": 0.00015809140581036658,
      "loss": 46.0,
      "step": 448
    },
    {
      "epoch": 0.07577099945154622,
      "grad_norm": 0.000935686519369483,
      "learning_rate": 0.0001579145485008695,
      "loss": 46.0,
      "step": 449
    },
    {
      "epoch": 0.07593975446146058,
      "grad_norm": 0.001003348152153194,
      "learning_rate": 0.00015773741819920446,
      "loss": 46.0,
      "step": 450
    },
    {
      "epoch": 0.07610850947137493,
      "grad_norm": 0.0011402338277548552,
      "learning_rate": 0.00015756001574031162,
      "loss": 46.0,
      "step": 451
    },
    {
      "epoch": 0.07627726448128928,
      "grad_norm": 0.0009165026131086051,
      "learning_rate": 0.00015738234196041408,
      "loss": 46.0,
      "step": 452
    },
    {
      "epoch": 0.07644601949120365,
      "grad_norm": 0.0012046222109347582,
      "learning_rate": 0.00015720439769701384,
      "loss": 46.0,
      "step": 453
    },
    {
      "epoch": 0.076614774501118,
      "grad_norm": 0.0013616887154057622,
      "learning_rate": 0.00015702618378888791,
      "loss": 46.0,
      "step": 454
    },
    {
      "epoch": 0.07678352951103236,
      "grad_norm": 0.0010609148303046823,
      "learning_rate": 0.00015684770107608428,
      "loss": 46.0,
      "step": 455
    },
    {
      "epoch": 0.07695228452094671,
      "grad_norm": 0.0010137903736904263,
      "learning_rate": 0.000156668950399918,
      "loss": 46.0,
      "step": 456
    },
    {
      "epoch": 0.07712103953086107,
      "grad_norm": 0.0008188852807506919,
      "learning_rate": 0.0001564899326029673,
      "loss": 46.0,
      "step": 457
    },
    {
      "epoch": 0.07728979454077543,
      "grad_norm": 0.001042909687384963,
      "learning_rate": 0.00015631064852906942,
      "loss": 46.0,
      "step": 458
    },
    {
      "epoch": 0.07745854955068979,
      "grad_norm": 0.001120585948228836,
      "learning_rate": 0.00015613109902331686,
      "loss": 46.0,
      "step": 459
    },
    {
      "epoch": 0.07762730456060414,
      "grad_norm": 0.0009330071043223143,
      "learning_rate": 0.00015595128493205325,
      "loss": 46.0,
      "step": 460
    },
    {
      "epoch": 0.0777960595705185,
      "grad_norm": 0.0011058711679652333,
      "learning_rate": 0.00015577120710286933,
      "loss": 46.0,
      "step": 461
    },
    {
      "epoch": 0.07796481458043286,
      "grad_norm": 0.0009043689933605492,
      "learning_rate": 0.00015559086638459917,
      "loss": 46.0,
      "step": 462
    },
    {
      "epoch": 0.07813356959034722,
      "grad_norm": 0.0009576305164955556,
      "learning_rate": 0.00015541026362731586,
      "loss": 46.0,
      "step": 463
    },
    {
      "epoch": 0.07830232460026157,
      "grad_norm": 0.0006179432384669781,
      "learning_rate": 0.0001552293996823278,
      "loss": 46.0,
      "step": 464
    },
    {
      "epoch": 0.07847107961017592,
      "grad_norm": 0.0009595199953764677,
      "learning_rate": 0.00015504827540217445,
      "loss": 46.0,
      "step": 465
    },
    {
      "epoch": 0.07863983462009029,
      "grad_norm": 0.0005762246437370777,
      "learning_rate": 0.00015486689164062248,
      "loss": 46.0,
      "step": 466
    },
    {
      "epoch": 0.07880858963000464,
      "grad_norm": 0.0010678599355742335,
      "learning_rate": 0.0001546852492526617,
      "loss": 46.0,
      "step": 467
    },
    {
      "epoch": 0.078977344639919,
      "grad_norm": 0.001243466162122786,
      "learning_rate": 0.00015450334909450087,
      "loss": 46.0,
      "step": 468
    },
    {
      "epoch": 0.07914609964983335,
      "grad_norm": 0.0010546196717768908,
      "learning_rate": 0.00015432119202356398,
      "loss": 46.0,
      "step": 469
    },
    {
      "epoch": 0.0793148546597477,
      "grad_norm": 0.0006426689797081053,
      "learning_rate": 0.0001541387788984859,
      "loss": 46.0,
      "step": 470
    },
    {
      "epoch": 0.07948360966966207,
      "grad_norm": 0.0011058398522436619,
      "learning_rate": 0.00015395611057910852,
      "loss": 46.0,
      "step": 471
    },
    {
      "epoch": 0.07965236467957643,
      "grad_norm": 0.0012805294245481491,
      "learning_rate": 0.0001537731879264767,
      "loss": 46.0,
      "step": 472
    },
    {
      "epoch": 0.07982111968949078,
      "grad_norm": 0.001057624351233244,
      "learning_rate": 0.000153590011802834,
      "loss": 46.0,
      "step": 473
    },
    {
      "epoch": 0.07998987469940513,
      "grad_norm": 0.0009916762355715036,
      "learning_rate": 0.00015340658307161885,
      "loss": 46.0,
      "step": 474
    },
    {
      "epoch": 0.0801586297093195,
      "grad_norm": 0.0011284681968390942,
      "learning_rate": 0.0001532229025974605,
      "loss": 46.0,
      "step": 475
    },
    {
      "epoch": 0.08032738471923385,
      "grad_norm": 0.0010007736273109913,
      "learning_rate": 0.00015303897124617467,
      "loss": 46.0,
      "step": 476
    },
    {
      "epoch": 0.08049613972914821,
      "grad_norm": 0.001305941492319107,
      "learning_rate": 0.00015285478988475972,
      "loss": 46.0,
      "step": 477
    },
    {
      "epoch": 0.08066489473906256,
      "grad_norm": 0.0006156846648082137,
      "learning_rate": 0.00015267035938139252,
      "loss": 46.0,
      "step": 478
    },
    {
      "epoch": 0.08083364974897693,
      "grad_norm": 0.0009203223744407296,
      "learning_rate": 0.00015248568060542423,
      "loss": 46.0,
      "step": 479
    },
    {
      "epoch": 0.08100240475889128,
      "grad_norm": 0.0008335929596796632,
      "learning_rate": 0.00015230075442737638,
      "loss": 46.0,
      "step": 480
    },
    {
      "epoch": 0.08117115976880564,
      "grad_norm": 0.001341037219390273,
      "learning_rate": 0.00015211558171893664,
      "loss": 46.0,
      "step": 481
    },
    {
      "epoch": 0.08133991477871999,
      "grad_norm": 0.0010233522625640035,
      "learning_rate": 0.00015193016335295477,
      "loss": 46.0,
      "step": 482
    },
    {
      "epoch": 0.08150866978863434,
      "grad_norm": 0.000823578389827162,
      "learning_rate": 0.00015174450020343842,
      "loss": 46.0,
      "step": 483
    },
    {
      "epoch": 0.08167742479854871,
      "grad_norm": 0.00170811521820724,
      "learning_rate": 0.00015155859314554924,
      "loss": 46.0,
      "step": 484
    },
    {
      "epoch": 0.08184617980846307,
      "grad_norm": 0.0008304553339257836,
      "learning_rate": 0.00015137244305559838,
      "loss": 46.0,
      "step": 485
    },
    {
      "epoch": 0.08201493481837742,
      "grad_norm": 0.0009338143863715231,
      "learning_rate": 0.00015118605081104275,
      "loss": 46.0,
      "step": 486
    },
    {
      "epoch": 0.08218368982829177,
      "grad_norm": 0.0007202426786534488,
      "learning_rate": 0.0001509994172904806,
      "loss": 46.0,
      "step": 487
    },
    {
      "epoch": 0.08235244483820614,
      "grad_norm": 0.0007982858805917203,
      "learning_rate": 0.00015081254337364754,
      "loss": 46.0,
      "step": 488
    },
    {
      "epoch": 0.0825211998481205,
      "grad_norm": 0.0007698725094087422,
      "learning_rate": 0.00015062542994141232,
      "loss": 46.0,
      "step": 489
    },
    {
      "epoch": 0.08268995485803485,
      "grad_norm": 0.0010482663055881858,
      "learning_rate": 0.00015043807787577262,
      "loss": 46.0,
      "step": 490
    },
    {
      "epoch": 0.0828587098679492,
      "grad_norm": 0.0014217188581824303,
      "learning_rate": 0.00015025048805985113,
      "loss": 46.0,
      "step": 491
    },
    {
      "epoch": 0.08302746487786355,
      "grad_norm": 0.0014352472499012947,
      "learning_rate": 0.00015006266137789108,
      "loss": 46.0,
      "step": 492
    },
    {
      "epoch": 0.08319621988777792,
      "grad_norm": 0.001194497337564826,
      "learning_rate": 0.0001498745987152523,
      "loss": 46.0,
      "step": 493
    },
    {
      "epoch": 0.08336497489769228,
      "grad_norm": 0.0008624936453998089,
      "learning_rate": 0.0001496863009584069,
      "loss": 46.0,
      "step": 494
    },
    {
      "epoch": 0.08353372990760663,
      "grad_norm": 0.001185000641271472,
      "learning_rate": 0.00014949776899493523,
      "loss": 46.0,
      "step": 495
    },
    {
      "epoch": 0.08370248491752098,
      "grad_norm": 0.0006659825448878109,
      "learning_rate": 0.00014930900371352157,
      "loss": 46.0,
      "step": 496
    },
    {
      "epoch": 0.08387123992743535,
      "grad_norm": 0.0014636954292654991,
      "learning_rate": 0.00014912000600394995,
      "loss": 46.0,
      "step": 497
    },
    {
      "epoch": 0.0840399949373497,
      "grad_norm": 0.0007161729736253619,
      "learning_rate": 0.00014893077675710013,
      "loss": 46.0,
      "step": 498
    },
    {
      "epoch": 0.08420874994726406,
      "grad_norm": 0.0008941322448663414,
      "learning_rate": 0.00014874131686494314,
      "loss": 46.0,
      "step": 499
    },
    {
      "epoch": 0.08437750495717841,
      "grad_norm": 0.0007309920038096607,
      "learning_rate": 0.00014855162722053725,
      "loss": 46.0,
      "step": 500
    },
    {
      "epoch": 0.08454625996709278,
      "grad_norm": 0.0008992912480607629,
      "learning_rate": 0.00014836170871802379,
      "loss": 46.0,
      "step": 501
    },
    {
      "epoch": 0.08471501497700713,
      "grad_norm": 0.001996345119550824,
      "learning_rate": 0.0001481715622526227,
      "loss": 46.0,
      "step": 502
    },
    {
      "epoch": 0.08488376998692149,
      "grad_norm": 0.001881232368759811,
      "learning_rate": 0.00014798118872062855,
      "loss": 46.0,
      "step": 503
    },
    {
      "epoch": 0.08505252499683584,
      "grad_norm": 0.00150417466647923,
      "learning_rate": 0.00014779058901940635,
      "loss": 46.0,
      "step": 504
    },
    {
      "epoch": 0.0852212800067502,
      "grad_norm": 0.0018176480662077665,
      "learning_rate": 0.000147599764047387,
      "loss": 46.0,
      "step": 505
    },
    {
      "epoch": 0.08539003501666456,
      "grad_norm": 0.0010128653375431895,
      "learning_rate": 0.00014740871470406342,
      "loss": 46.0,
      "step": 506
    },
    {
      "epoch": 0.08555879002657892,
      "grad_norm": 0.0005888533196412027,
      "learning_rate": 0.00014721744188998605,
      "loss": 46.0,
      "step": 507
    },
    {
      "epoch": 0.08572754503649327,
      "grad_norm": 0.0010933991288766265,
      "learning_rate": 0.00014702594650675872,
      "loss": 46.0,
      "step": 508
    },
    {
      "epoch": 0.08589630004640762,
      "grad_norm": 0.0009146520169451833,
      "learning_rate": 0.00014683422945703452,
      "loss": 46.0,
      "step": 509
    },
    {
      "epoch": 0.08606505505632199,
      "grad_norm": 0.0010995094198733568,
      "learning_rate": 0.0001466422916445112,
      "loss": 46.0,
      "step": 510
    },
    {
      "epoch": 0.08623381006623634,
      "grad_norm": 0.001196429249830544,
      "learning_rate": 0.00014645013397392723,
      "loss": 46.0,
      "step": 511
    },
    {
      "epoch": 0.0864025650761507,
      "grad_norm": 0.0016038153553381562,
      "learning_rate": 0.00014625775735105744,
      "loss": 46.0,
      "step": 512
    },
    {
      "epoch": 0.08657132008606505,
      "grad_norm": 0.00237817014567554,
      "learning_rate": 0.00014606516268270874,
      "loss": 46.0,
      "step": 513
    },
    {
      "epoch": 0.08674007509597942,
      "grad_norm": 0.0008629761869087815,
      "learning_rate": 0.00014587235087671577,
      "loss": 46.0,
      "step": 514
    },
    {
      "epoch": 0.08690883010589377,
      "grad_norm": 0.0009417358669452369,
      "learning_rate": 0.0001456793228419368,
      "loss": 46.0,
      "step": 515
    },
    {
      "epoch": 0.08707758511580813,
      "grad_norm": 0.0004477721522562206,
      "learning_rate": 0.0001454860794882492,
      "loss": 46.0,
      "step": 516
    },
    {
      "epoch": 0.08724634012572248,
      "grad_norm": 0.001148428302258253,
      "learning_rate": 0.00014529262172654545,
      "loss": 46.0,
      "step": 517
    },
    {
      "epoch": 0.08741509513563683,
      "grad_norm": 0.001068097772076726,
      "learning_rate": 0.00014509895046872854,
      "loss": 46.0,
      "step": 518
    },
    {
      "epoch": 0.0875838501455512,
      "grad_norm": 0.0017000689404085279,
      "learning_rate": 0.00014490506662770796,
      "loss": 46.0,
      "step": 519
    },
    {
      "epoch": 0.08775260515546555,
      "grad_norm": 0.0009183180518448353,
      "learning_rate": 0.0001447109711173951,
      "loss": 46.0,
      "step": 520
    },
    {
      "epoch": 0.08792136016537991,
      "grad_norm": 0.0010085629764944315,
      "learning_rate": 0.0001445166648526992,
      "loss": 46.0,
      "step": 521
    },
    {
      "epoch": 0.08809011517529426,
      "grad_norm": 0.0015139034949243069,
      "learning_rate": 0.00014432214874952296,
      "loss": 46.0,
      "step": 522
    },
    {
      "epoch": 0.08825887018520863,
      "grad_norm": 0.001635671011172235,
      "learning_rate": 0.00014412742372475808,
      "loss": 46.0,
      "step": 523
    },
    {
      "epoch": 0.08842762519512298,
      "grad_norm": 0.0006598219624720514,
      "learning_rate": 0.00014393249069628112,
      "loss": 46.0,
      "step": 524
    },
    {
      "epoch": 0.08859638020503734,
      "grad_norm": 0.0005806029075756669,
      "learning_rate": 0.00014373735058294918,
      "loss": 46.0,
      "step": 525
    },
    {
      "epoch": 0.08876513521495169,
      "grad_norm": 0.0013370051747187972,
      "learning_rate": 0.00014354200430459537,
      "loss": 46.0,
      "step": 526
    },
    {
      "epoch": 0.08893389022486604,
      "grad_norm": 0.001640844508074224,
      "learning_rate": 0.0001433464527820247,
      "loss": 46.0,
      "step": 527
    },
    {
      "epoch": 0.08910264523478041,
      "grad_norm": 0.0014307728270068765,
      "learning_rate": 0.00014315069693700955,
      "loss": 46.0,
      "step": 528
    },
    {
      "epoch": 0.08927140024469477,
      "grad_norm": 0.0017885541310533881,
      "learning_rate": 0.00014295473769228547,
      "loss": 46.0,
      "step": 529
    },
    {
      "epoch": 0.08944015525460912,
      "grad_norm": 0.0008539075497537851,
      "learning_rate": 0.00014275857597154684,
      "loss": 46.0,
      "step": 530
    },
    {
      "epoch": 0.08960891026452347,
      "grad_norm": 0.0015051423106342554,
      "learning_rate": 0.0001425622126994423,
      "loss": 46.0,
      "step": 531
    },
    {
      "epoch": 0.08977766527443784,
      "grad_norm": 0.0010912073776125908,
      "learning_rate": 0.0001423656488015707,
      "loss": 46.0,
      "step": 532
    },
    {
      "epoch": 0.0899464202843522,
      "grad_norm": 0.002204689895734191,
      "learning_rate": 0.00014216888520447648,
      "loss": 46.0,
      "step": 533
    },
    {
      "epoch": 0.09011517529426655,
      "grad_norm": 0.0009545715292915702,
      "learning_rate": 0.00014197192283564542,
      "loss": 46.0,
      "step": 534
    },
    {
      "epoch": 0.0902839303041809,
      "grad_norm": 0.0008314741426147521,
      "learning_rate": 0.00014177476262350027,
      "loss": 46.0,
      "step": 535
    },
    {
      "epoch": 0.09045268531409527,
      "grad_norm": 0.0015365943545475602,
      "learning_rate": 0.00014157740549739637,
      "loss": 46.0,
      "step": 536
    },
    {
      "epoch": 0.09062144032400962,
      "grad_norm": 0.0020499713718891144,
      "learning_rate": 0.00014137985238761718,
      "loss": 46.0,
      "step": 537
    },
    {
      "epoch": 0.09079019533392398,
      "grad_norm": 0.0008071595220826566,
      "learning_rate": 0.0001411821042253701,
      "loss": 46.0,
      "step": 538
    },
    {
      "epoch": 0.09095895034383833,
      "grad_norm": 0.0009972534608095884,
      "learning_rate": 0.0001409841619427818,
      "loss": 46.0,
      "step": 539
    },
    {
      "epoch": 0.09112770535375268,
      "grad_norm": 0.000979638658463955,
      "learning_rate": 0.00014078602647289414,
      "loss": 46.0,
      "step": 540
    },
    {
      "epoch": 0.09129646036366705,
      "grad_norm": 0.0021402165293693542,
      "learning_rate": 0.00014058769874965944,
      "loss": 46.0,
      "step": 541
    },
    {
      "epoch": 0.0914652153735814,
      "grad_norm": 0.001012645079754293,
      "learning_rate": 0.00014038917970793634,
      "loss": 46.0,
      "step": 542
    },
    {
      "epoch": 0.09163397038349576,
      "grad_norm": 0.0011463590199127793,
      "learning_rate": 0.0001401904702834853,
      "loss": 46.0,
      "step": 543
    },
    {
      "epoch": 0.09180272539341011,
      "grad_norm": 0.0007347980281338096,
      "learning_rate": 0.00013999157141296418,
      "loss": 46.0,
      "step": 544
    },
    {
      "epoch": 0.09197148040332448,
      "grad_norm": 0.001035605208016932,
      "learning_rate": 0.0001397924840339238,
      "loss": 46.0,
      "step": 545
    },
    {
      "epoch": 0.09214023541323883,
      "grad_norm": 0.0019607653375715017,
      "learning_rate": 0.0001395932090848036,
      "loss": 46.0,
      "step": 546
    },
    {
      "epoch": 0.09230899042315319,
      "grad_norm": 0.0016435689758509398,
      "learning_rate": 0.0001393937475049271,
      "loss": 46.0,
      "step": 547
    },
    {
      "epoch": 0.09247774543306754,
      "grad_norm": 0.0011168678756803274,
      "learning_rate": 0.00013919410023449769,
      "loss": 46.0,
      "step": 548
    },
    {
      "epoch": 0.09264650044298191,
      "grad_norm": 0.000912736461032182,
      "learning_rate": 0.00013899426821459385,
      "loss": 46.0,
      "step": 549
    },
    {
      "epoch": 0.09281525545289626,
      "grad_norm": 0.0017240199958905578,
      "learning_rate": 0.00013879425238716504,
      "loss": 46.0,
      "step": 550
    },
    {
      "epoch": 0.09298401046281062,
      "grad_norm": 0.00043686252320185304,
      "learning_rate": 0.00013859405369502718,
      "loss": 46.0,
      "step": 551
    },
    {
      "epoch": 0.09315276547272497,
      "grad_norm": 0.0015362072736024857,
      "learning_rate": 0.000138393673081858,
      "loss": 46.0,
      "step": 552
    },
    {
      "epoch": 0.09332152048263932,
      "grad_norm": 0.0010787771316245198,
      "learning_rate": 0.0001381931114921929,
      "loss": 46.0,
      "step": 553
    },
    {
      "epoch": 0.09349027549255369,
      "grad_norm": 0.0015809914330020547,
      "learning_rate": 0.00013799236987142028,
      "loss": 46.0,
      "step": 554
    },
    {
      "epoch": 0.09365903050246804,
      "grad_norm": 0.0007688051555305719,
      "learning_rate": 0.00013779144916577717,
      "loss": 46.0,
      "step": 555
    },
    {
      "epoch": 0.0938277855123824,
      "grad_norm": 0.0016322305891662836,
      "learning_rate": 0.00013759035032234474,
      "loss": 46.0,
      "step": 556
    },
    {
      "epoch": 0.09399654052229675,
      "grad_norm": 0.0008620574371889234,
      "learning_rate": 0.00013738907428904388,
      "loss": 46.0,
      "step": 557
    },
    {
      "epoch": 0.09416529553221112,
      "grad_norm": 0.0013546262634918094,
      "learning_rate": 0.00013718762201463068,
      "loss": 46.0,
      "step": 558
    },
    {
      "epoch": 0.09433405054212547,
      "grad_norm": 0.0010162482503801584,
      "learning_rate": 0.000136985994448692,
      "loss": 46.0,
      "step": 559
    },
    {
      "epoch": 0.09450280555203983,
      "grad_norm": 0.0009128018864430487,
      "learning_rate": 0.00013678419254164084,
      "loss": 46.0,
      "step": 560
    },
    {
      "epoch": 0.09467156056195418,
      "grad_norm": 0.001009635510854423,
      "learning_rate": 0.00013658221724471227,
      "loss": 46.0,
      "step": 561
    },
    {
      "epoch": 0.09484031557186853,
      "grad_norm": 0.001470302464440465,
      "learning_rate": 0.0001363800695099584,
      "loss": 46.0,
      "step": 562
    },
    {
      "epoch": 0.0950090705817829,
      "grad_norm": 0.0010980793740600348,
      "learning_rate": 0.0001361777502902443,
      "loss": 46.0,
      "step": 563
    },
    {
      "epoch": 0.09517782559169725,
      "grad_norm": 0.0007694049854762852,
      "learning_rate": 0.00013597526053924335,
      "loss": 46.0,
      "step": 564
    },
    {
      "epoch": 0.09534658060161161,
      "grad_norm": 0.0008336540777236223,
      "learning_rate": 0.00013577260121143268,
      "loss": 46.0,
      "step": 565
    },
    {
      "epoch": 0.09551533561152596,
      "grad_norm": 0.0015536536229774356,
      "learning_rate": 0.0001355697732620889,
      "loss": 46.0,
      "step": 566
    },
    {
      "epoch": 0.09568409062144033,
      "grad_norm": 0.001046131830662489,
      "learning_rate": 0.00013536677764728336,
      "loss": 46.0,
      "step": 567
    },
    {
      "epoch": 0.09585284563135468,
      "grad_norm": 0.0007584646809846163,
      "learning_rate": 0.00013516361532387773,
      "loss": 46.0,
      "step": 568
    },
    {
      "epoch": 0.09602160064126904,
      "grad_norm": 0.0009031200897879899,
      "learning_rate": 0.00013496028724951958,
      "loss": 46.0,
      "step": 569
    },
    {
      "epoch": 0.09619035565118339,
      "grad_norm": 0.00145262002479285,
      "learning_rate": 0.00013475679438263764,
      "loss": 46.0,
      "step": 570
    },
    {
      "epoch": 0.09635911066109776,
      "grad_norm": 0.0011636154958978295,
      "learning_rate": 0.00013455313768243756,
      "loss": 46.0,
      "step": 571
    },
    {
      "epoch": 0.09652786567101211,
      "grad_norm": 0.0007297865231521428,
      "learning_rate": 0.00013434931810889719,
      "loss": 46.0,
      "step": 572
    },
    {
      "epoch": 0.09669662068092647,
      "grad_norm": 0.0012997461017221212,
      "learning_rate": 0.00013414533662276207,
      "loss": 46.0,
      "step": 573
    },
    {
      "epoch": 0.09686537569084082,
      "grad_norm": 0.0010920744389295578,
      "learning_rate": 0.00013394119418554107,
      "loss": 46.0,
      "step": 574
    },
    {
      "epoch": 0.09703413070075517,
      "grad_norm": 0.0009875032119452953,
      "learning_rate": 0.00013373689175950163,
      "loss": 46.0,
      "step": 575
    },
    {
      "epoch": 0.09720288571066954,
      "grad_norm": 0.0005962883587926626,
      "learning_rate": 0.0001335324303076654,
      "loss": 46.0,
      "step": 576
    },
    {
      "epoch": 0.0973716407205839,
      "grad_norm": 0.0010888517135754228,
      "learning_rate": 0.00013332781079380358,
      "loss": 46.0,
      "step": 577
    },
    {
      "epoch": 0.09754039573049825,
      "grad_norm": 0.0007129855803214014,
      "learning_rate": 0.0001331230341824324,
      "loss": 46.0,
      "step": 578
    },
    {
      "epoch": 0.0977091507404126,
      "grad_norm": 0.000518136948812753,
      "learning_rate": 0.0001329181014388087,
      "loss": 46.0,
      "step": 579
    },
    {
      "epoch": 0.09787790575032697,
      "grad_norm": 0.0012831081403419375,
      "learning_rate": 0.00013271301352892528,
      "loss": 46.0,
      "step": 580
    },
    {
      "epoch": 0.09804666076024132,
      "grad_norm": 0.0007919691852293909,
      "learning_rate": 0.00013250777141950618,
      "loss": 46.0,
      "step": 581
    },
    {
      "epoch": 0.09821541577015568,
      "grad_norm": 0.001164072658866644,
      "learning_rate": 0.0001323023760780025,
      "loss": 46.0,
      "step": 582
    },
    {
      "epoch": 0.09838417078007003,
      "grad_norm": 0.0008439416997134686,
      "learning_rate": 0.00013209682847258752,
      "loss": 46.0,
      "step": 583
    },
    {
      "epoch": 0.0985529257899844,
      "grad_norm": 0.0015564275672659278,
      "learning_rate": 0.00013189112957215227,
      "loss": 46.0,
      "step": 584
    },
    {
      "epoch": 0.09872168079989875,
      "grad_norm": 0.001126340590417385,
      "learning_rate": 0.0001316852803463009,
      "loss": 46.0,
      "step": 585
    },
    {
      "epoch": 0.0988904358098131,
      "grad_norm": 0.00132976402528584,
      "learning_rate": 0.0001314792817653462,
      "loss": 46.0,
      "step": 586
    },
    {
      "epoch": 0.09905919081972746,
      "grad_norm": 0.0006973096751607955,
      "learning_rate": 0.000131273134800305,
      "loss": 46.0,
      "step": 587
    },
    {
      "epoch": 0.09922794582964181,
      "grad_norm": 0.0008691195980645716,
      "learning_rate": 0.00013106684042289352,
      "loss": 46.0,
      "step": 588
    },
    {
      "epoch": 0.09939670083955618,
      "grad_norm": 0.0007818661979399621,
      "learning_rate": 0.00013086039960552283,
      "loss": 46.0,
      "step": 589
    },
    {
      "epoch": 0.09956545584947053,
      "grad_norm": 0.0013533838791772723,
      "learning_rate": 0.00013065381332129434,
      "loss": 46.0,
      "step": 590
    },
    {
      "epoch": 0.09973421085938489,
      "grad_norm": 0.0008758413605391979,
      "learning_rate": 0.00013044708254399503,
      "loss": 46.0,
      "step": 591
    },
    {
      "epoch": 0.09990296586929924,
      "grad_norm": 0.001231315778568387,
      "learning_rate": 0.00013024020824809317,
      "loss": 46.0,
      "step": 592
    },
    {
      "epoch": 0.10007172087921361,
      "grad_norm": 0.0004609159368555993,
      "learning_rate": 0.00013003319140873328,
      "loss": 46.0,
      "step": 593
    },
    {
      "epoch": 0.10024047588912796,
      "grad_norm": 0.0009504602057859302,
      "learning_rate": 0.000129826033001732,
      "loss": 46.0,
      "step": 594
    },
    {
      "epoch": 0.10040923089904231,
      "grad_norm": 0.001184243243187666,
      "learning_rate": 0.0001296187340035732,
      "loss": 46.0,
      "step": 595
    },
    {
      "epoch": 0.10057798590895667,
      "grad_norm": 0.0015000102575868368,
      "learning_rate": 0.00012941129539140346,
      "loss": 46.0,
      "step": 596
    },
    {
      "epoch": 0.10074674091887102,
      "grad_norm": 0.0008899506065063179,
      "learning_rate": 0.00012920371814302743,
      "loss": 46.0,
      "step": 597
    },
    {
      "epoch": 0.10091549592878539,
      "grad_norm": 0.0013121910160407424,
      "learning_rate": 0.00012899600323690332,
      "loss": 46.0,
      "step": 598
    },
    {
      "epoch": 0.10108425093869974,
      "grad_norm": 0.0006298114312812686,
      "learning_rate": 0.00012878815165213813,
      "loss": 46.0,
      "step": 599
    },
    {
      "epoch": 0.1012530059486141,
      "grad_norm": 0.001439255429431796,
      "learning_rate": 0.00012858016436848323,
      "loss": 46.0,
      "step": 600
    },
    {
      "epoch": 0.10142176095852845,
      "grad_norm": 0.001775096170604229,
      "learning_rate": 0.00012837204236632952,
      "loss": 46.0,
      "step": 601
    },
    {
      "epoch": 0.10159051596844282,
      "grad_norm": 0.0008805092074908316,
      "learning_rate": 0.000128163786626703,
      "loss": 46.0,
      "step": 602
    },
    {
      "epoch": 0.10175927097835717,
      "grad_norm": 0.0007393642445094883,
      "learning_rate": 0.00012795539813126005,
      "loss": 46.0,
      "step": 603
    },
    {
      "epoch": 0.10192802598827153,
      "grad_norm": 0.0013515629107132554,
      "learning_rate": 0.0001277468778622828,
      "loss": 46.0,
      "step": 604
    },
    {
      "epoch": 0.10209678099818588,
      "grad_norm": 0.0010095036122947931,
      "learning_rate": 0.00012753822680267458,
      "loss": 46.0,
      "step": 605
    },
    {
      "epoch": 0.10226553600810025,
      "grad_norm": 0.001216367818415165,
      "learning_rate": 0.00012732944593595515,
      "loss": 46.0,
      "step": 606
    },
    {
      "epoch": 0.1024342910180146,
      "grad_norm": 0.0010075365426018834,
      "learning_rate": 0.00012712053624625618,
      "loss": 46.0,
      "step": 607
    },
    {
      "epoch": 0.10260304602792895,
      "grad_norm": 0.00094554980751127,
      "learning_rate": 0.0001269114987183166,
      "loss": 46.0,
      "step": 608
    },
    {
      "epoch": 0.10277180103784331,
      "grad_norm": 0.0011870136950165033,
      "learning_rate": 0.0001267023343374779,
      "loss": 46.0,
      "step": 609
    },
    {
      "epoch": 0.10294055604775766,
      "grad_norm": 0.00031608124845661223,
      "learning_rate": 0.00012649304408967954,
      "loss": 46.0,
      "step": 610
    },
    {
      "epoch": 0.10310931105767203,
      "grad_norm": 0.0017526588635519147,
      "learning_rate": 0.00012628362896145422,
      "loss": 46.0,
      "step": 611
    },
    {
      "epoch": 0.10327806606758638,
      "grad_norm": 0.0016203138511627913,
      "learning_rate": 0.00012607408993992335,
      "loss": 46.0,
      "step": 612
    },
    {
      "epoch": 0.10344682107750074,
      "grad_norm": 0.0017116809030994773,
      "learning_rate": 0.0001258644280127924,
      "loss": 46.0,
      "step": 613
    },
    {
      "epoch": 0.10361557608741509,
      "grad_norm": 0.0010332480305805802,
      "learning_rate": 0.00012565464416834597,
      "loss": 46.0,
      "step": 614
    },
    {
      "epoch": 0.10378433109732946,
      "grad_norm": 0.0016215546056628227,
      "learning_rate": 0.00012544473939544355,
      "loss": 46.0,
      "step": 615
    },
    {
      "epoch": 0.10395308610724381,
      "grad_norm": 0.0011419228976592422,
      "learning_rate": 0.00012523471468351457,
      "loss": 46.0,
      "step": 616
    },
    {
      "epoch": 0.10412184111715816,
      "grad_norm": 0.001963686663657427,
      "learning_rate": 0.00012502457102255381,
      "loss": 46.0,
      "step": 617
    },
    {
      "epoch": 0.10429059612707252,
      "grad_norm": 0.0018488739151507616,
      "learning_rate": 0.0001248143094031168,
      "loss": 46.0,
      "step": 618
    },
    {
      "epoch": 0.10445935113698689,
      "grad_norm": 0.0009021925507113338,
      "learning_rate": 0.00012460393081631494,
      "loss": 46.0,
      "step": 619
    },
    {
      "epoch": 0.10462810614690124,
      "grad_norm": 0.000811565259937197,
      "learning_rate": 0.00012439343625381117,
      "loss": 46.0,
      "step": 620
    },
    {
      "epoch": 0.1047968611568156,
      "grad_norm": 0.0009416831890121102,
      "learning_rate": 0.00012418282670781502,
      "loss": 46.0,
      "step": 621
    },
    {
      "epoch": 0.10496561616672995,
      "grad_norm": 0.0011268195230513811,
      "learning_rate": 0.000123972103171078,
      "loss": 46.0,
      "step": 622
    },
    {
      "epoch": 0.1051343711766443,
      "grad_norm": 0.0008039239328354597,
      "learning_rate": 0.00012376126663688893,
      "loss": 46.0,
      "step": 623
    },
    {
      "epoch": 0.10530312618655867,
      "grad_norm": 0.001955381128937006,
      "learning_rate": 0.00012355031809906935,
      "loss": 46.0,
      "step": 624
    },
    {
      "epoch": 0.10547188119647302,
      "grad_norm": 0.0006566803785972297,
      "learning_rate": 0.00012333925855196863,
      "loss": 46.0,
      "step": 625
    },
    {
      "epoch": 0.10564063620638738,
      "grad_norm": 0.0007125309784896672,
      "learning_rate": 0.00012312808899045955,
      "loss": 46.0,
      "step": 626
    },
    {
      "epoch": 0.10580939121630173,
      "grad_norm": 0.0008565658936277032,
      "learning_rate": 0.00012291681040993332,
      "loss": 46.0,
      "step": 627
    },
    {
      "epoch": 0.1059781462262161,
      "grad_norm": 0.0012913001701235771,
      "learning_rate": 0.00012270542380629515,
      "loss": 46.0,
      "step": 628
    },
    {
      "epoch": 0.10614690123613045,
      "grad_norm": 0.0005761163774877787,
      "learning_rate": 0.00012249393017595936,
      "loss": 46.0,
      "step": 629
    },
    {
      "epoch": 0.1063156562460448,
      "grad_norm": 0.0014059852110221982,
      "learning_rate": 0.0001222823305158448,
      "loss": 46.0,
      "step": 630
    },
    {
      "epoch": 0.10648441125595916,
      "grad_norm": 0.0009610773995518684,
      "learning_rate": 0.0001220706258233701,
      "loss": 46.0,
      "step": 631
    },
    {
      "epoch": 0.10665316626587351,
      "grad_norm": 0.00141863152384758,
      "learning_rate": 0.000121858817096449,
      "loss": 46.0,
      "step": 632
    },
    {
      "epoch": 0.10682192127578788,
      "grad_norm": 0.0008159265271387994,
      "learning_rate": 0.00012164690533348557,
      "loss": 46.0,
      "step": 633
    },
    {
      "epoch": 0.10699067628570223,
      "grad_norm": 0.0004636669473256916,
      "learning_rate": 0.00012143489153336972,
      "loss": 46.0,
      "step": 634
    },
    {
      "epoch": 0.10715943129561659,
      "grad_norm": 0.002586417132988572,
      "learning_rate": 0.00012122277669547208,
      "loss": 46.0,
      "step": 635
    },
    {
      "epoch": 0.10732818630553094,
      "grad_norm": 0.0007418668828904629,
      "learning_rate": 0.00012101056181963977,
      "loss": 46.0,
      "step": 636
    },
    {
      "epoch": 0.10749694131544531,
      "grad_norm": 0.0007878371980041265,
      "learning_rate": 0.00012079824790619136,
      "loss": 46.0,
      "step": 637
    },
    {
      "epoch": 0.10766569632535966,
      "grad_norm": 0.0008747411775402725,
      "learning_rate": 0.00012058583595591227,
      "loss": 46.0,
      "step": 638
    },
    {
      "epoch": 0.10783445133527401,
      "grad_norm": 0.0004629637114703655,
      "learning_rate": 0.00012037332697004999,
      "loss": 46.0,
      "step": 639
    },
    {
      "epoch": 0.10800320634518837,
      "grad_norm": 0.0013667724560946226,
      "learning_rate": 0.0001201607219503095,
      "loss": 46.0,
      "step": 640
    },
    {
      "epoch": 0.10817196135510274,
      "grad_norm": 0.000639776058960706,
      "learning_rate": 0.00011994802189884833,
      "loss": 46.0,
      "step": 641
    },
    {
      "epoch": 0.10834071636501709,
      "grad_norm": 0.002615303033962846,
      "learning_rate": 0.00011973522781827217,
      "loss": 46.0,
      "step": 642
    },
    {
      "epoch": 0.10850947137493144,
      "grad_norm": 0.0006569072720594704,
      "learning_rate": 0.00011952234071162967,
      "loss": 46.0,
      "step": 643
    },
    {
      "epoch": 0.1086782263848458,
      "grad_norm": 0.0020576249808073044,
      "learning_rate": 0.0001193093615824082,
      "loss": 46.0,
      "step": 644
    },
    {
      "epoch": 0.10884698139476015,
      "grad_norm": 0.0006775553338229656,
      "learning_rate": 0.00011909629143452876,
      "loss": 46.0,
      "step": 645
    },
    {
      "epoch": 0.10901573640467452,
      "grad_norm": 0.0010402743937447667,
      "learning_rate": 0.00011888313127234145,
      "loss": 46.0,
      "step": 646
    },
    {
      "epoch": 0.10918449141458887,
      "grad_norm": 0.0009751408942975104,
      "learning_rate": 0.00011866988210062064,
      "loss": 46.0,
      "step": 647
    },
    {
      "epoch": 0.10935324642450323,
      "grad_norm": 0.0006023211753927171,
      "learning_rate": 0.00011845654492456031,
      "loss": 46.0,
      "step": 648
    },
    {
      "epoch": 0.10952200143441758,
      "grad_norm": 0.002933788113296032,
      "learning_rate": 0.00011824312074976919,
      "loss": 46.0,
      "step": 649
    },
    {
      "epoch": 0.10969075644433195,
      "grad_norm": 0.00047651128261350095,
      "learning_rate": 0.0001180296105822662,
      "loss": 46.0,
      "step": 650
    },
    {
      "epoch": 0.1098595114542463,
      "grad_norm": 0.0009603831567801535,
      "learning_rate": 0.00011781601542847548,
      "loss": 46.0,
      "step": 651
    },
    {
      "epoch": 0.11002826646416065,
      "grad_norm": 0.0006936375284567475,
      "learning_rate": 0.0001176023362952219,
      "loss": 46.0,
      "step": 652
    },
    {
      "epoch": 0.11019702147407501,
      "grad_norm": 0.0005456854123622179,
      "learning_rate": 0.00011738857418972609,
      "loss": 46.0,
      "step": 653
    },
    {
      "epoch": 0.11036577648398938,
      "grad_norm": 0.0012621437199413776,
      "learning_rate": 0.00011717473011959979,
      "loss": 46.0,
      "step": 654
    },
    {
      "epoch": 0.11053453149390373,
      "grad_norm": 0.0023074436467140913,
      "learning_rate": 0.0001169608050928412,
      "loss": 46.0,
      "step": 655
    },
    {
      "epoch": 0.11070328650381808,
      "grad_norm": 0.0011624132748693228,
      "learning_rate": 0.00011674680011782997,
      "loss": 46.0,
      "step": 656
    },
    {
      "epoch": 0.11087204151373244,
      "grad_norm": 0.0004293357487767935,
      "learning_rate": 0.00011653271620332274,
      "loss": 46.0,
      "step": 657
    },
    {
      "epoch": 0.11104079652364679,
      "grad_norm": 0.002472059801220894,
      "learning_rate": 0.00011631855435844816,
      "loss": 46.0,
      "step": 658
    },
    {
      "epoch": 0.11120955153356116,
      "grad_norm": 0.0008103114669211209,
      "learning_rate": 0.00011610431559270228,
      "loss": 46.0,
      "step": 659
    },
    {
      "epoch": 0.11137830654347551,
      "grad_norm": 0.0008554041851311922,
      "learning_rate": 0.00011589000091594369,
      "loss": 46.0,
      "step": 660
    },
    {
      "epoch": 0.11154706155338986,
      "grad_norm": 0.0012236748589202762,
      "learning_rate": 0.00011567561133838877,
      "loss": 46.0,
      "step": 661
    },
    {
      "epoch": 0.11171581656330422,
      "grad_norm": 0.0010636880761012435,
      "learning_rate": 0.00011546114787060709,
      "loss": 46.0,
      "step": 662
    },
    {
      "epoch": 0.11188457157321859,
      "grad_norm": 0.001079093781299889,
      "learning_rate": 0.00011524661152351641,
      "loss": 46.0,
      "step": 663
    },
    {
      "epoch": 0.11205332658313294,
      "grad_norm": 0.0013255566591396928,
      "learning_rate": 0.000115032003308378,
      "loss": 46.0,
      "step": 664
    },
    {
      "epoch": 0.1122220815930473,
      "grad_norm": 0.0007876198505982757,
      "learning_rate": 0.00011481732423679197,
      "loss": 46.0,
      "step": 665
    },
    {
      "epoch": 0.11239083660296165,
      "grad_norm": 0.0007665604935027659,
      "learning_rate": 0.00011460257532069241,
      "loss": 46.0,
      "step": 666
    },
    {
      "epoch": 0.112559591612876,
      "grad_norm": 0.000991009990684688,
      "learning_rate": 0.00011438775757234261,
      "loss": 46.0,
      "step": 667
    },
    {
      "epoch": 0.11272834662279037,
      "grad_norm": 0.0008777152397669852,
      "learning_rate": 0.00011417287200433033,
      "loss": 46.0,
      "step": 668
    },
    {
      "epoch": 0.11289710163270472,
      "grad_norm": 0.0004085569526068866,
      "learning_rate": 0.00011395791962956299,
      "loss": 46.0,
      "step": 669
    },
    {
      "epoch": 0.11306585664261908,
      "grad_norm": 0.0023701228201389313,
      "learning_rate": 0.00011374290146126299,
      "loss": 46.0,
      "step": 670
    },
    {
      "epoch": 0.11323461165253343,
      "grad_norm": 0.0006865113973617554,
      "learning_rate": 0.00011352781851296277,
      "loss": 46.0,
      "step": 671
    },
    {
      "epoch": 0.1134033666624478,
      "grad_norm": 0.0007230490446090698,
      "learning_rate": 0.00011331267179850016,
      "loss": 46.0,
      "step": 672
    },
    {
      "epoch": 0.11357212167236215,
      "grad_norm": 0.0006796296802349389,
      "learning_rate": 0.00011309746233201357,
      "loss": 46.0,
      "step": 673
    },
    {
      "epoch": 0.1137408766822765,
      "grad_norm": 0.0009500381420366466,
      "learning_rate": 0.00011288219112793722,
      "loss": 46.0,
      "step": 674
    },
    {
      "epoch": 0.11390963169219086,
      "grad_norm": 0.0005749015836045146,
      "learning_rate": 0.00011266685920099632,
      "loss": 46.0,
      "step": 675
    },
    {
      "epoch": 0.11407838670210523,
      "grad_norm": 0.0008176440605893731,
      "learning_rate": 0.00011245146756620233,
      "loss": 46.0,
      "step": 676
    },
    {
      "epoch": 0.11424714171201958,
      "grad_norm": 0.0012956701684743166,
      "learning_rate": 0.00011223601723884807,
      "loss": 46.0,
      "step": 677
    },
    {
      "epoch": 0.11441589672193393,
      "grad_norm": 0.0012071337550878525,
      "learning_rate": 0.00011202050923450317,
      "loss": 46.0,
      "step": 678
    },
    {
      "epoch": 0.11458465173184829,
      "grad_norm": 0.0008238774607889354,
      "learning_rate": 0.00011180494456900903,
      "loss": 46.0,
      "step": 679
    },
    {
      "epoch": 0.11475340674176264,
      "grad_norm": 0.000518804183229804,
      "learning_rate": 0.00011158932425847415,
      "loss": 46.0,
      "step": 680
    },
    {
      "epoch": 0.11492216175167701,
      "grad_norm": 0.0009683236130513251,
      "learning_rate": 0.00011137364931926932,
      "loss": 46.0,
      "step": 681
    },
    {
      "epoch": 0.11509091676159136,
      "grad_norm": 0.0006015019025653601,
      "learning_rate": 0.00011115792076802286,
      "loss": 46.0,
      "step": 682
    },
    {
      "epoch": 0.11525967177150571,
      "grad_norm": 0.0009642652003094554,
      "learning_rate": 0.00011094213962161576,
      "loss": 46.0,
      "step": 683
    },
    {
      "epoch": 0.11542842678142007,
      "grad_norm": 0.0013890763511881232,
      "learning_rate": 0.000110726306897177,
      "loss": 46.0,
      "step": 684
    },
    {
      "epoch": 0.11559718179133444,
      "grad_norm": 0.0007178309024311602,
      "learning_rate": 0.00011051042361207861,
      "loss": 46.0,
      "step": 685
    },
    {
      "epoch": 0.11576593680124879,
      "grad_norm": 0.00312964734621346,
      "learning_rate": 0.00011029449078393098,
      "loss": 46.0,
      "step": 686
    },
    {
      "epoch": 0.11593469181116314,
      "grad_norm": 0.0011070105247199535,
      "learning_rate": 0.000110078509430578,
      "loss": 46.0,
      "step": 687
    },
    {
      "epoch": 0.1161034468210775,
      "grad_norm": 0.002353600226342678,
      "learning_rate": 0.00010986248057009239,
      "loss": 46.0,
      "step": 688
    },
    {
      "epoch": 0.11627220183099186,
      "grad_norm": 0.0012018800480291247,
      "learning_rate": 0.00010964640522077065,
      "loss": 46.0,
      "step": 689
    },
    {
      "epoch": 0.11644095684090622,
      "grad_norm": 0.0008240241440944374,
      "learning_rate": 0.00010943028440112854,
      "loss": 46.0,
      "step": 690
    },
    {
      "epoch": 0.11660971185082057,
      "grad_norm": 0.000862058368511498,
      "learning_rate": 0.00010921411912989615,
      "loss": 46.0,
      "step": 691
    },
    {
      "epoch": 0.11677846686073493,
      "grad_norm": 0.0008431962342001498,
      "learning_rate": 0.000108997910426013,
      "loss": 46.0,
      "step": 692
    },
    {
      "epoch": 0.11694722187064928,
      "grad_norm": 0.0010532697197049856,
      "learning_rate": 0.00010878165930862343,
      "loss": 46.0,
      "step": 693
    },
    {
      "epoch": 0.11711597688056365,
      "grad_norm": 0.0008627737988717854,
      "learning_rate": 0.0001085653667970717,
      "loss": 46.0,
      "step": 694
    },
    {
      "epoch": 0.117284731890478,
      "grad_norm": 0.0010250735795125365,
      "learning_rate": 0.00010834903391089712,
      "loss": 46.0,
      "step": 695
    },
    {
      "epoch": 0.11745348690039235,
      "grad_norm": 0.0015792236663401127,
      "learning_rate": 0.00010813266166982946,
      "loss": 46.0,
      "step": 696
    },
    {
      "epoch": 0.11762224191030671,
      "grad_norm": 0.0014957215171307325,
      "learning_rate": 0.00010791625109378376,
      "loss": 46.0,
      "step": 697
    },
    {
      "epoch": 0.11779099692022107,
      "grad_norm": 0.0006147067178972065,
      "learning_rate": 0.00010769980320285599,
      "loss": 46.0,
      "step": 698
    },
    {
      "epoch": 0.11795975193013543,
      "grad_norm": 0.0004508892016019672,
      "learning_rate": 0.00010748331901731793,
      "loss": 46.0,
      "step": 699
    },
    {
      "epoch": 0.11812850694004978,
      "grad_norm": 0.0005747199174948037,
      "learning_rate": 0.00010726679955761238,
      "loss": 46.0,
      "step": 700
    },
    {
      "epoch": 0.11829726194996414,
      "grad_norm": 0.0018676796462386847,
      "learning_rate": 0.00010705024584434853,
      "loss": 46.0,
      "step": 701
    },
    {
      "epoch": 0.11846601695987849,
      "grad_norm": 0.0008204419864341617,
      "learning_rate": 0.00010683365889829692,
      "loss": 46.0,
      "step": 702
    },
    {
      "epoch": 0.11863477196979286,
      "grad_norm": 0.0007434524013660848,
      "learning_rate": 0.0001066170397403848,
      "loss": 46.0,
      "step": 703
    },
    {
      "epoch": 0.11880352697970721,
      "grad_norm": 0.0006690184236504138,
      "learning_rate": 0.00010640038939169124,
      "loss": 46.0,
      "step": 704
    },
    {
      "epoch": 0.11897228198962156,
      "grad_norm": 0.0015682928496971726,
      "learning_rate": 0.00010618370887344234,
      "loss": 46.0,
      "step": 705
    },
    {
      "epoch": 0.11914103699953592,
      "grad_norm": 0.000833112804684788,
      "learning_rate": 0.0001059669992070064,
      "loss": 46.0,
      "step": 706
    },
    {
      "epoch": 0.11930979200945029,
      "grad_norm": 0.0005185617483220994,
      "learning_rate": 0.00010575026141388915,
      "loss": 46.0,
      "step": 707
    },
    {
      "epoch": 0.11947854701936464,
      "grad_norm": 0.001206497778184712,
      "learning_rate": 0.00010553349651572881,
      "loss": 46.0,
      "step": 708
    },
    {
      "epoch": 0.11964730202927899,
      "grad_norm": 0.0009608225082047284,
      "learning_rate": 0.00010531670553429156,
      "loss": 46.0,
      "step": 709
    },
    {
      "epoch": 0.11981605703919335,
      "grad_norm": 0.0008177707786671817,
      "learning_rate": 0.00010509988949146627,
      "loss": 46.0,
      "step": 710
    },
    {
      "epoch": 0.11998481204910771,
      "grad_norm": 0.0021591701079159975,
      "learning_rate": 0.00010488304940926012,
      "loss": 46.0,
      "step": 711
    },
    {
      "epoch": 0.12015356705902207,
      "grad_norm": 0.002416484523564577,
      "learning_rate": 0.00010466618630979357,
      "loss": 46.0,
      "step": 712
    },
    {
      "epoch": 0.12032232206893642,
      "grad_norm": 0.0010555331828072667,
      "learning_rate": 0.00010444930121529555,
      "loss": 46.0,
      "step": 713
    },
    {
      "epoch": 0.12049107707885078,
      "grad_norm": 0.00034197320928797126,
      "learning_rate": 0.00010423239514809866,
      "loss": 46.0,
      "step": 714
    },
    {
      "epoch": 0.12065983208876513,
      "grad_norm": 0.0006457001436501741,
      "learning_rate": 0.00010401546913063442,
      "loss": 46.0,
      "step": 715
    },
    {
      "epoch": 0.1208285870986795,
      "grad_norm": 0.0006262487731873989,
      "learning_rate": 0.0001037985241854283,
      "loss": 46.0,
      "step": 716
    },
    {
      "epoch": 0.12099734210859385,
      "grad_norm": 0.0030058289412409067,
      "learning_rate": 0.00010358156133509513,
      "loss": 46.0,
      "step": 717
    },
    {
      "epoch": 0.1211660971185082,
      "grad_norm": 0.0007620776887051761,
      "learning_rate": 0.00010336458160233393,
      "loss": 46.0,
      "step": 718
    },
    {
      "epoch": 0.12133485212842256,
      "grad_norm": 0.0007673231884837151,
      "learning_rate": 0.0001031475860099235,
      "loss": 46.0,
      "step": 719
    },
    {
      "epoch": 0.12150360713833692,
      "grad_norm": 0.0008117277757264674,
      "learning_rate": 0.00010293057558071728,
      "loss": 46.0,
      "step": 720
    },
    {
      "epoch": 0.12167236214825128,
      "grad_norm": 0.0005056714289821684,
      "learning_rate": 0.00010271355133763869,
      "loss": 46.0,
      "step": 721
    },
    {
      "epoch": 0.12184111715816563,
      "grad_norm": 0.0008250874234363437,
      "learning_rate": 0.00010249651430367628,
      "loss": 46.0,
      "step": 722
    },
    {
      "epoch": 0.12200987216807999,
      "grad_norm": 0.0010816901922225952,
      "learning_rate": 0.00010227946550187884,
      "loss": 46.0,
      "step": 723
    },
    {
      "epoch": 0.12217862717799435,
      "grad_norm": 0.0008268383098766208,
      "learning_rate": 0.00010206240595535063,
      "loss": 46.0,
      "step": 724
    },
    {
      "epoch": 0.12234738218790871,
      "grad_norm": 0.0011831193696707487,
      "learning_rate": 0.00010184533668724667,
      "loss": 46.0,
      "step": 725
    },
    {
      "epoch": 0.12251613719782306,
      "grad_norm": 0.003153022611513734,
      "learning_rate": 0.00010162825872076767,
      "loss": 46.0,
      "step": 726
    },
    {
      "epoch": 0.12268489220773741,
      "grad_norm": 0.00038787827361375093,
      "learning_rate": 0.00010141117307915537,
      "loss": 46.0,
      "step": 727
    },
    {
      "epoch": 0.12285364721765177,
      "grad_norm": 0.0009175827144645154,
      "learning_rate": 0.00010119408078568774,
      "loss": 46.0,
      "step": 728
    },
    {
      "epoch": 0.12302240222756614,
      "grad_norm": 0.0005741899949498475,
      "learning_rate": 0.00010097698286367409,
      "loss": 46.0,
      "step": 729
    },
    {
      "epoch": 0.12319115723748049,
      "grad_norm": 0.0005662553012371063,
      "learning_rate": 0.00010075988033645025,
      "loss": 46.0,
      "step": 730
    },
    {
      "epoch": 0.12319115723748049,
      "eval_loss": 11.5,
      "eval_runtime": 14.1785,
      "eval_samples_per_second": 175.97,
      "eval_steps_per_second": 88.021,
      "step": 730
    },
    {
      "epoch": 0.12335991224739484,
      "grad_norm": 0.0005169582436792552,
      "learning_rate": 0.00010054277422737369,
      "loss": 46.0,
      "step": 731
    },
    {
      "epoch": 0.1235286672573092,
      "grad_norm": 0.00028787710471078753,
      "learning_rate": 0.0001003256655598189,
      "loss": 46.0,
      "step": 732
    },
    {
      "epoch": 0.12369742226722356,
      "grad_norm": 0.0016796085983514786,
      "learning_rate": 0.00010010855535717232,
      "loss": 46.0,
      "step": 733
    },
    {
      "epoch": 0.12386617727713792,
      "grad_norm": 0.0016076072352007031,
      "learning_rate": 9.98914446428277e-05,
      "loss": 46.0,
      "step": 734
    },
    {
      "epoch": 0.12403493228705227,
      "grad_norm": 0.0008219809387810528,
      "learning_rate": 9.967433444018111e-05,
      "loss": 46.0,
      "step": 735
    },
    {
      "epoch": 0.12420368729696663,
      "grad_norm": 0.002049337374046445,
      "learning_rate": 9.945722577262632e-05,
      "loss": 46.0,
      "step": 736
    },
    {
      "epoch": 0.12437244230688098,
      "grad_norm": 0.0005754379089921713,
      "learning_rate": 9.92401196635498e-05,
      "loss": 46.0,
      "step": 737
    },
    {
      "epoch": 0.12454119731679535,
      "grad_norm": 0.0013358064461499453,
      "learning_rate": 9.902301713632592e-05,
      "loss": 46.0,
      "step": 738
    },
    {
      "epoch": 0.1247099523267097,
      "grad_norm": 0.0006682523526251316,
      "learning_rate": 9.880591921431227e-05,
      "loss": 46.0,
      "step": 739
    },
    {
      "epoch": 0.12487870733662405,
      "grad_norm": 0.00043337634997442365,
      "learning_rate": 9.858882692084466e-05,
      "loss": 46.0,
      "step": 740
    },
    {
      "epoch": 0.1250474623465384,
      "grad_norm": 0.0005919807590544224,
      "learning_rate": 9.837174127923237e-05,
      "loss": 46.0,
      "step": 741
    },
    {
      "epoch": 0.12521621735645277,
      "grad_norm": 0.0013382198521867394,
      "learning_rate": 9.815466331275335e-05,
      "loss": 46.0,
      "step": 742
    },
    {
      "epoch": 0.12538497236636711,
      "grad_norm": 0.000782837625592947,
      "learning_rate": 9.793759404464936e-05,
      "loss": 46.0,
      "step": 743
    },
    {
      "epoch": 0.12555372737628148,
      "grad_norm": 0.0009372693602927029,
      "learning_rate": 9.772053449812118e-05,
      "loss": 46.0,
      "step": 744
    },
    {
      "epoch": 0.12572248238619585,
      "grad_norm": 0.000794100807979703,
      "learning_rate": 9.750348569632375e-05,
      "loss": 46.0,
      "step": 745
    },
    {
      "epoch": 0.1258912373961102,
      "grad_norm": 0.0010177150834351778,
      "learning_rate": 9.728644866236132e-05,
      "loss": 46.0,
      "step": 746
    },
    {
      "epoch": 0.12605999240602456,
      "grad_norm": 0.0010124148102477193,
      "learning_rate": 9.706942441928273e-05,
      "loss": 46.0,
      "step": 747
    },
    {
      "epoch": 0.12622874741593892,
      "grad_norm": 0.0010855060536414385,
      "learning_rate": 9.68524139900765e-05,
      "loss": 46.0,
      "step": 748
    },
    {
      "epoch": 0.12639750242585326,
      "grad_norm": 0.0008628432988189161,
      "learning_rate": 9.66354183976661e-05,
      "loss": 46.0,
      "step": 749
    },
    {
      "epoch": 0.12656625743576763,
      "grad_norm": 0.0007373031694442034,
      "learning_rate": 9.641843866490492e-05,
      "loss": 46.0,
      "step": 750
    },
    {
      "epoch": 0.12673501244568197,
      "grad_norm": 0.0007096838089637458,
      "learning_rate": 9.62014758145717e-05,
      "loss": 46.0,
      "step": 751
    },
    {
      "epoch": 0.12690376745559634,
      "grad_norm": 0.0009518765727989376,
      "learning_rate": 9.598453086936559e-05,
      "loss": 46.0,
      "step": 752
    },
    {
      "epoch": 0.1270725224655107,
      "grad_norm": 0.0013600359670817852,
      "learning_rate": 9.576760485190137e-05,
      "loss": 46.0,
      "step": 753
    },
    {
      "epoch": 0.12724127747542505,
      "grad_norm": 0.0011481187539175153,
      "learning_rate": 9.555069878470449e-05,
      "loss": 46.0,
      "step": 754
    },
    {
      "epoch": 0.12741003248533941,
      "grad_norm": 0.000521646230481565,
      "learning_rate": 9.533381369020646e-05,
      "loss": 46.0,
      "step": 755
    },
    {
      "epoch": 0.12757878749525375,
      "grad_norm": 0.0005510274204425514,
      "learning_rate": 9.511695059073989e-05,
      "loss": 46.0,
      "step": 756
    },
    {
      "epoch": 0.12774754250516812,
      "grad_norm": 0.003858919721096754,
      "learning_rate": 9.490011050853375e-05,
      "loss": 46.0,
      "step": 757
    },
    {
      "epoch": 0.1279162975150825,
      "grad_norm": 0.0031276738736778498,
      "learning_rate": 9.468329446570848e-05,
      "loss": 46.0,
      "step": 758
    },
    {
      "epoch": 0.12808505252499683,
      "grad_norm": 0.0007159564993344247,
      "learning_rate": 9.446650348427117e-05,
      "loss": 46.0,
      "step": 759
    },
    {
      "epoch": 0.1282538075349112,
      "grad_norm": 0.0005575700779445469,
      "learning_rate": 9.424973858611088e-05,
      "loss": 46.0,
      "step": 760
    },
    {
      "epoch": 0.12842256254482554,
      "grad_norm": 0.00020999553089495748,
      "learning_rate": 9.403300079299364e-05,
      "loss": 46.0,
      "step": 761
    },
    {
      "epoch": 0.1285913175547399,
      "grad_norm": 0.0008280250476673245,
      "learning_rate": 9.38162911265577e-05,
      "loss": 46.0,
      "step": 762
    },
    {
      "epoch": 0.12876007256465427,
      "grad_norm": 0.0007614243077114224,
      "learning_rate": 9.35996106083088e-05,
      "loss": 46.0,
      "step": 763
    },
    {
      "epoch": 0.1289288275745686,
      "grad_norm": 0.0007390084792859852,
      "learning_rate": 9.338296025961521e-05,
      "loss": 46.0,
      "step": 764
    },
    {
      "epoch": 0.12909758258448298,
      "grad_norm": 0.0005527955945581198,
      "learning_rate": 9.31663411017031e-05,
      "loss": 46.0,
      "step": 765
    },
    {
      "epoch": 0.12926633759439735,
      "grad_norm": 0.0007797405123710632,
      "learning_rate": 9.294975415565151e-05,
      "loss": 46.0,
      "step": 766
    },
    {
      "epoch": 0.12943509260431169,
      "grad_norm": 0.0008938325918279588,
      "learning_rate": 9.273320044238763e-05,
      "loss": 46.0,
      "step": 767
    },
    {
      "epoch": 0.12960384761422605,
      "grad_norm": 0.0011889681918546557,
      "learning_rate": 9.251668098268209e-05,
      "loss": 46.0,
      "step": 768
    },
    {
      "epoch": 0.1297726026241404,
      "grad_norm": 0.0007662948337383568,
      "learning_rate": 9.230019679714405e-05,
      "loss": 46.0,
      "step": 769
    },
    {
      "epoch": 0.12994135763405476,
      "grad_norm": 0.0007776907877996564,
      "learning_rate": 9.208374890621628e-05,
      "loss": 46.0,
      "step": 770
    },
    {
      "epoch": 0.13011011264396913,
      "grad_norm": 0.0019411947578191757,
      "learning_rate": 9.186733833017061e-05,
      "loss": 46.0,
      "step": 771
    },
    {
      "epoch": 0.13027886765388347,
      "grad_norm": 0.0007572571048513055,
      "learning_rate": 9.165096608910287e-05,
      "loss": 46.0,
      "step": 772
    },
    {
      "epoch": 0.13044762266379784,
      "grad_norm": 0.0006218423368409276,
      "learning_rate": 9.143463320292832e-05,
      "loss": 46.0,
      "step": 773
    },
    {
      "epoch": 0.13061637767371218,
      "grad_norm": 0.0005072550848126411,
      "learning_rate": 9.12183406913766e-05,
      "loss": 46.0,
      "step": 774
    },
    {
      "epoch": 0.13078513268362654,
      "grad_norm": 0.0011237628059461713,
      "learning_rate": 9.100208957398703e-05,
      "loss": 46.0,
      "step": 775
    },
    {
      "epoch": 0.1309538876935409,
      "grad_norm": 0.0009162898059003055,
      "learning_rate": 9.078588087010389e-05,
      "loss": 46.0,
      "step": 776
    },
    {
      "epoch": 0.13112264270345525,
      "grad_norm": 0.003182788612321019,
      "learning_rate": 9.056971559887145e-05,
      "loss": 46.0,
      "step": 777
    },
    {
      "epoch": 0.13129139771336962,
      "grad_norm": 0.0018501750892028213,
      "learning_rate": 9.035359477922936e-05,
      "loss": 46.0,
      "step": 778
    },
    {
      "epoch": 0.13146015272328399,
      "grad_norm": 0.0009051130618900061,
      "learning_rate": 9.013751942990766e-05,
      "loss": 46.0,
      "step": 779
    },
    {
      "epoch": 0.13162890773319832,
      "grad_norm": 0.0003044608747586608,
      "learning_rate": 8.9921490569422e-05,
      "loss": 46.0,
      "step": 780
    },
    {
      "epoch": 0.1317976627431127,
      "grad_norm": 0.0008892896585166454,
      "learning_rate": 8.970550921606903e-05,
      "loss": 46.0,
      "step": 781
    },
    {
      "epoch": 0.13196641775302703,
      "grad_norm": 0.002685688668861985,
      "learning_rate": 8.948957638792144e-05,
      "loss": 46.0,
      "step": 782
    },
    {
      "epoch": 0.1321351727629414,
      "grad_norm": 0.0016494488809257746,
      "learning_rate": 8.927369310282302e-05,
      "loss": 46.0,
      "step": 783
    },
    {
      "epoch": 0.13230392777285577,
      "grad_norm": 0.0006050110096111894,
      "learning_rate": 8.905786037838426e-05,
      "loss": 46.0,
      "step": 784
    },
    {
      "epoch": 0.1324726827827701,
      "grad_norm": 0.0009582451893948019,
      "learning_rate": 8.884207923197715e-05,
      "loss": 46.0,
      "step": 785
    },
    {
      "epoch": 0.13264143779268447,
      "grad_norm": 0.004504833836108446,
      "learning_rate": 8.86263506807307e-05,
      "loss": 46.0,
      "step": 786
    },
    {
      "epoch": 0.13281019280259881,
      "grad_norm": 0.0005395681946538389,
      "learning_rate": 8.841067574152589e-05,
      "loss": 46.0,
      "step": 787
    },
    {
      "epoch": 0.13297894781251318,
      "grad_norm": 0.001102139474824071,
      "learning_rate": 8.819505543099099e-05,
      "loss": 46.0,
      "step": 788
    },
    {
      "epoch": 0.13314770282242755,
      "grad_norm": 0.000723580364137888,
      "learning_rate": 8.797949076549685e-05,
      "loss": 46.0,
      "step": 789
    },
    {
      "epoch": 0.1333164578323419,
      "grad_norm": 0.002134741051122546,
      "learning_rate": 8.776398276115198e-05,
      "loss": 46.0,
      "step": 790
    },
    {
      "epoch": 0.13348521284225626,
      "grad_norm": 0.0018205747473984957,
      "learning_rate": 8.75485324337977e-05,
      "loss": 46.0,
      "step": 791
    },
    {
      "epoch": 0.13365396785217062,
      "grad_norm": 0.0017829221906140447,
      "learning_rate": 8.733314079900372e-05,
      "loss": 46.0,
      "step": 792
    },
    {
      "epoch": 0.13382272286208496,
      "grad_norm": 0.000708136591129005,
      "learning_rate": 8.71178088720628e-05,
      "loss": 46.0,
      "step": 793
    },
    {
      "epoch": 0.13399147787199933,
      "grad_norm": 0.0009631026186980307,
      "learning_rate": 8.690253766798644e-05,
      "loss": 46.0,
      "step": 794
    },
    {
      "epoch": 0.13416023288191367,
      "grad_norm": 0.0010598397348076105,
      "learning_rate": 8.668732820149989e-05,
      "loss": 46.0,
      "step": 795
    },
    {
      "epoch": 0.13432898789182804,
      "grad_norm": 0.0007573203183710575,
      "learning_rate": 8.647218148703727e-05,
      "loss": 46.0,
      "step": 796
    },
    {
      "epoch": 0.1344977429017424,
      "grad_norm": 0.0017247875221073627,
      "learning_rate": 8.625709853873705e-05,
      "loss": 46.0,
      "step": 797
    },
    {
      "epoch": 0.13466649791165675,
      "grad_norm": 0.000708206498529762,
      "learning_rate": 8.6042080370437e-05,
      "loss": 46.0,
      "step": 798
    },
    {
      "epoch": 0.1348352529215711,
      "grad_norm": 0.0007804720080457628,
      "learning_rate": 8.582712799566969e-05,
      "loss": 46.0,
      "step": 799
    },
    {
      "epoch": 0.13500400793148545,
      "grad_norm": 0.002211306942626834,
      "learning_rate": 8.561224242765744e-05,
      "loss": 46.0,
      "step": 800
    },
    {
      "epoch": 0.13517276294139982,
      "grad_norm": 0.0037598570343106985,
      "learning_rate": 8.539742467930761e-05,
      "loss": 46.0,
      "step": 801
    },
    {
      "epoch": 0.1353415179513142,
      "grad_norm": 0.0007354649715125561,
      "learning_rate": 8.518267576320806e-05,
      "loss": 46.0,
      "step": 802
    },
    {
      "epoch": 0.13551027296122853,
      "grad_norm": 0.00044775562128052115,
      "learning_rate": 8.496799669162206e-05,
      "loss": 46.0,
      "step": 803
    },
    {
      "epoch": 0.1356790279711429,
      "grad_norm": 0.0005587537889368832,
      "learning_rate": 8.475338847648361e-05,
      "loss": 46.0,
      "step": 804
    },
    {
      "epoch": 0.13584778298105726,
      "grad_norm": 0.0005210166564211249,
      "learning_rate": 8.453885212939294e-05,
      "loss": 46.0,
      "step": 805
    },
    {
      "epoch": 0.1360165379909716,
      "grad_norm": 0.0007426153169944882,
      "learning_rate": 8.432438866161124e-05,
      "loss": 46.0,
      "step": 806
    },
    {
      "epoch": 0.13618529300088597,
      "grad_norm": 0.0013306104810908437,
      "learning_rate": 8.410999908405635e-05,
      "loss": 46.0,
      "step": 807
    },
    {
      "epoch": 0.1363540480108003,
      "grad_norm": 0.0013473851140588522,
      "learning_rate": 8.389568440729776e-05,
      "loss": 46.0,
      "step": 808
    },
    {
      "epoch": 0.13652280302071468,
      "grad_norm": 0.00044588756281882524,
      "learning_rate": 8.368144564155185e-05,
      "loss": 46.0,
      "step": 809
    },
    {
      "epoch": 0.13669155803062905,
      "grad_norm": 0.0006963923806324601,
      "learning_rate": 8.346728379667727e-05,
      "loss": 46.0,
      "step": 810
    },
    {
      "epoch": 0.13686031304054339,
      "grad_norm": 0.001505714375525713,
      "learning_rate": 8.325319988217004e-05,
      "loss": 46.0,
      "step": 811
    },
    {
      "epoch": 0.13702906805045775,
      "grad_norm": 0.0009234807803295553,
      "learning_rate": 8.303919490715881e-05,
      "loss": 46.0,
      "step": 812
    },
    {
      "epoch": 0.1371978230603721,
      "grad_norm": 0.001101883128285408,
      "learning_rate": 8.282526988040022e-05,
      "loss": 46.0,
      "step": 813
    },
    {
      "epoch": 0.13736657807028646,
      "grad_norm": 0.00047088676365092397,
      "learning_rate": 8.261142581027392e-05,
      "loss": 46.0,
      "step": 814
    },
    {
      "epoch": 0.13753533308020083,
      "grad_norm": 0.0010181186953559518,
      "learning_rate": 8.239766370477811e-05,
      "loss": 46.0,
      "step": 815
    },
    {
      "epoch": 0.13770408809011517,
      "grad_norm": 0.0008076895028352737,
      "learning_rate": 8.218398457152454e-05,
      "loss": 46.0,
      "step": 816
    },
    {
      "epoch": 0.13787284310002954,
      "grad_norm": 0.000841393368318677,
      "learning_rate": 8.197038941773381e-05,
      "loss": 46.0,
      "step": 817
    },
    {
      "epoch": 0.1380415981099439,
      "grad_norm": 0.0008007394499145448,
      "learning_rate": 8.175687925023082e-05,
      "loss": 46.0,
      "step": 818
    },
    {
      "epoch": 0.13821035311985824,
      "grad_norm": 0.000764928525313735,
      "learning_rate": 8.154345507543972e-05,
      "loss": 46.0,
      "step": 819
    },
    {
      "epoch": 0.1383791081297726,
      "grad_norm": 0.0015394032234326005,
      "learning_rate": 8.133011789937937e-05,
      "loss": 46.0,
      "step": 820
    },
    {
      "epoch": 0.13854786313968695,
      "grad_norm": 0.0008911428158171475,
      "learning_rate": 8.111686872765859e-05,
      "loss": 46.0,
      "step": 821
    },
    {
      "epoch": 0.13871661814960132,
      "grad_norm": 0.0006434863316826522,
      "learning_rate": 8.090370856547127e-05,
      "loss": 46.0,
      "step": 822
    },
    {
      "epoch": 0.13888537315951568,
      "grad_norm": 0.0009041949524544179,
      "learning_rate": 8.069063841759182e-05,
      "loss": 46.0,
      "step": 823
    },
    {
      "epoch": 0.13905412816943002,
      "grad_norm": 0.0003908716607838869,
      "learning_rate": 8.047765928837034e-05,
      "loss": 46.0,
      "step": 824
    },
    {
      "epoch": 0.1392228831793444,
      "grad_norm": 0.0004880438500549644,
      "learning_rate": 8.026477218172785e-05,
      "loss": 46.0,
      "step": 825
    },
    {
      "epoch": 0.13939163818925873,
      "grad_norm": 0.0008036144427023828,
      "learning_rate": 8.005197810115168e-05,
      "loss": 46.0,
      "step": 826
    },
    {
      "epoch": 0.1395603931991731,
      "grad_norm": 0.00038311423850245774,
      "learning_rate": 7.983927804969054e-05,
      "loss": 46.0,
      "step": 827
    },
    {
      "epoch": 0.13972914820908747,
      "grad_norm": 0.0007264793966896832,
      "learning_rate": 7.962667302995004e-05,
      "loss": 46.0,
      "step": 828
    },
    {
      "epoch": 0.1398979032190018,
      "grad_norm": 0.006564402021467686,
      "learning_rate": 7.941416404408778e-05,
      "loss": 46.0,
      "step": 829
    },
    {
      "epoch": 0.14006665822891617,
      "grad_norm": 0.0009483057074248791,
      "learning_rate": 7.920175209380865e-05,
      "loss": 46.0,
      "step": 830
    },
    {
      "epoch": 0.14023541323883051,
      "grad_norm": 0.001220498699694872,
      "learning_rate": 7.898943818036024e-05,
      "loss": 46.0,
      "step": 831
    },
    {
      "epoch": 0.14040416824874488,
      "grad_norm": 0.0012295391643419862,
      "learning_rate": 7.877722330452795e-05,
      "loss": 46.0,
      "step": 832
    },
    {
      "epoch": 0.14057292325865925,
      "grad_norm": 0.0008359167259186506,
      "learning_rate": 7.856510846663031e-05,
      "loss": 46.0,
      "step": 833
    },
    {
      "epoch": 0.1407416782685736,
      "grad_norm": 0.001273457077331841,
      "learning_rate": 7.835309466651444e-05,
      "loss": 46.0,
      "step": 834
    },
    {
      "epoch": 0.14091043327848796,
      "grad_norm": 0.0027280368376523256,
      "learning_rate": 7.814118290355102e-05,
      "loss": 46.0,
      "step": 835
    },
    {
      "epoch": 0.14107918828840232,
      "grad_norm": 0.0015108464285731316,
      "learning_rate": 7.792937417662993e-05,
      "loss": 46.0,
      "step": 836
    },
    {
      "epoch": 0.14124794329831666,
      "grad_norm": 0.002349273534491658,
      "learning_rate": 7.771766948415525e-05,
      "loss": 46.0,
      "step": 837
    },
    {
      "epoch": 0.14141669830823103,
      "grad_norm": 0.0012695115292444825,
      "learning_rate": 7.750606982404065e-05,
      "loss": 46.0,
      "step": 838
    },
    {
      "epoch": 0.14158545331814537,
      "grad_norm": 0.0009470002260059118,
      "learning_rate": 7.729457619370489e-05,
      "loss": 46.0,
      "step": 839
    },
    {
      "epoch": 0.14175420832805974,
      "grad_norm": 0.0006858884589746594,
      "learning_rate": 7.708318959006669e-05,
      "loss": 46.0,
      "step": 840
    },
    {
      "epoch": 0.1419229633379741,
      "grad_norm": 0.0016968476120382547,
      "learning_rate": 7.687191100954046e-05,
      "loss": 46.0,
      "step": 841
    },
    {
      "epoch": 0.14209171834788845,
      "grad_norm": 0.002445329912006855,
      "learning_rate": 7.666074144803139e-05,
      "loss": 46.0,
      "step": 842
    },
    {
      "epoch": 0.1422604733578028,
      "grad_norm": 0.0007218089303933084,
      "learning_rate": 7.644968190093067e-05,
      "loss": 46.0,
      "step": 843
    },
    {
      "epoch": 0.14242922836771715,
      "grad_norm": 0.0014190871734172106,
      "learning_rate": 7.623873336311108e-05,
      "loss": 46.0,
      "step": 844
    },
    {
      "epoch": 0.14259798337763152,
      "grad_norm": 0.0006293446640484035,
      "learning_rate": 7.602789682892203e-05,
      "loss": 46.0,
      "step": 845
    },
    {
      "epoch": 0.1427667383875459,
      "grad_norm": 0.0012009006459265947,
      "learning_rate": 7.581717329218499e-05,
      "loss": 46.0,
      "step": 846
    },
    {
      "epoch": 0.14293549339746023,
      "grad_norm": 0.0011125734308734536,
      "learning_rate": 7.560656374618886e-05,
      "loss": 46.0,
      "step": 847
    },
    {
      "epoch": 0.1431042484073746,
      "grad_norm": 0.002301817527040839,
      "learning_rate": 7.539606918368507e-05,
      "loss": 46.0,
      "step": 848
    },
    {
      "epoch": 0.14327300341728896,
      "grad_norm": 0.0006231066072359681,
      "learning_rate": 7.518569059688325e-05,
      "loss": 46.0,
      "step": 849
    },
    {
      "epoch": 0.1434417584272033,
      "grad_norm": 0.002790312049910426,
      "learning_rate": 7.497542897744622e-05,
      "loss": 46.0,
      "step": 850
    },
    {
      "epoch": 0.14361051343711767,
      "grad_norm": 0.0007836490985937417,
      "learning_rate": 7.476528531648544e-05,
      "loss": 46.0,
      "step": 851
    },
    {
      "epoch": 0.143779268447032,
      "grad_norm": 0.0008998861303552985,
      "learning_rate": 7.455526060455648e-05,
      "loss": 46.0,
      "step": 852
    },
    {
      "epoch": 0.14394802345694638,
      "grad_norm": 0.0022323448210954666,
      "learning_rate": 7.434535583165408e-05,
      "loss": 46.0,
      "step": 853
    },
    {
      "epoch": 0.14411677846686075,
      "grad_norm": 0.0007675597444176674,
      "learning_rate": 7.413557198720765e-05,
      "loss": 46.0,
      "step": 854
    },
    {
      "epoch": 0.14428553347677509,
      "grad_norm": 0.0015968787483870983,
      "learning_rate": 7.392591006007666e-05,
      "loss": 46.0,
      "step": 855
    },
    {
      "epoch": 0.14445428848668945,
      "grad_norm": 0.0005020895623601973,
      "learning_rate": 7.37163710385458e-05,
      "loss": 46.0,
      "step": 856
    },
    {
      "epoch": 0.1446230434966038,
      "grad_norm": 0.0010543781099840999,
      "learning_rate": 7.350695591032049e-05,
      "loss": 46.0,
      "step": 857
    },
    {
      "epoch": 0.14479179850651816,
      "grad_norm": 0.000769981590565294,
      "learning_rate": 7.329766566252212e-05,
      "loss": 46.0,
      "step": 858
    },
    {
      "epoch": 0.14496055351643253,
      "grad_norm": 0.0009824162116274238,
      "learning_rate": 7.30885012816834e-05,
      "loss": 46.0,
      "step": 859
    },
    {
      "epoch": 0.14512930852634687,
      "grad_norm": 0.0020662923343479633,
      "learning_rate": 7.287946375374385e-05,
      "loss": 46.0,
      "step": 860
    },
    {
      "epoch": 0.14529806353626123,
      "grad_norm": 0.002364357467740774,
      "learning_rate": 7.26705540640449e-05,
      "loss": 46.0,
      "step": 861
    },
    {
      "epoch": 0.1454668185461756,
      "grad_norm": 0.001292189583182335,
      "learning_rate": 7.246177319732543e-05,
      "loss": 46.0,
      "step": 862
    },
    {
      "epoch": 0.14563557355608994,
      "grad_norm": 0.000383058562874794,
      "learning_rate": 7.225312213771722e-05,
      "loss": 46.0,
      "step": 863
    },
    {
      "epoch": 0.1458043285660043,
      "grad_norm": 0.0009881592122837901,
      "learning_rate": 7.204460186873995e-05,
      "loss": 46.0,
      "step": 864
    },
    {
      "epoch": 0.14597308357591865,
      "grad_norm": 0.0010527949780225754,
      "learning_rate": 7.183621337329703e-05,
      "loss": 46.0,
      "step": 865
    },
    {
      "epoch": 0.14614183858583302,
      "grad_norm": 0.001268843188881874,
      "learning_rate": 7.162795763367049e-05,
      "loss": 46.0,
      "step": 866
    },
    {
      "epoch": 0.14631059359574738,
      "grad_norm": 0.003271568100899458,
      "learning_rate": 7.141983563151677e-05,
      "loss": 46.0,
      "step": 867
    },
    {
      "epoch": 0.14647934860566172,
      "grad_norm": 0.0008496344089508057,
      "learning_rate": 7.121184834786188e-05,
      "loss": 46.0,
      "step": 868
    },
    {
      "epoch": 0.1466481036155761,
      "grad_norm": 0.001987769966945052,
      "learning_rate": 7.10039967630967e-05,
      "loss": 46.0,
      "step": 869
    },
    {
      "epoch": 0.14681685862549043,
      "grad_norm": 0.0011683752527460456,
      "learning_rate": 7.079628185697258e-05,
      "loss": 46.0,
      "step": 870
    },
    {
      "epoch": 0.1469856136354048,
      "grad_norm": 0.000853047997225076,
      "learning_rate": 7.058870460859656e-05,
      "loss": 46.0,
      "step": 871
    },
    {
      "epoch": 0.14715436864531917,
      "grad_norm": 0.001560655073262751,
      "learning_rate": 7.03812659964268e-05,
      "loss": 46.0,
      "step": 872
    },
    {
      "epoch": 0.1473231236552335,
      "grad_norm": 0.0007004750659689307,
      "learning_rate": 7.017396699826803e-05,
      "loss": 46.0,
      "step": 873
    },
    {
      "epoch": 0.14749187866514787,
      "grad_norm": 0.0006875868421047926,
      "learning_rate": 6.996680859126677e-05,
      "loss": 46.0,
      "step": 874
    },
    {
      "epoch": 0.14766063367506224,
      "grad_norm": 0.0009068532381206751,
      "learning_rate": 6.975979175190688e-05,
      "loss": 46.0,
      "step": 875
    },
    {
      "epoch": 0.14782938868497658,
      "grad_norm": 0.0005159789579920471,
      "learning_rate": 6.955291745600498e-05,
      "loss": 46.0,
      "step": 876
    },
    {
      "epoch": 0.14799814369489095,
      "grad_norm": 0.0014988022157922387,
      "learning_rate": 6.934618667870567e-05,
      "loss": 46.0,
      "step": 877
    },
    {
      "epoch": 0.1481668987048053,
      "grad_norm": 0.0007760879816487432,
      "learning_rate": 6.913960039447718e-05,
      "loss": 46.0,
      "step": 878
    },
    {
      "epoch": 0.14833565371471966,
      "grad_norm": 0.0010894398437812924,
      "learning_rate": 6.893315957710649e-05,
      "loss": 46.0,
      "step": 879
    },
    {
      "epoch": 0.14850440872463402,
      "grad_norm": 0.0004840297333430499,
      "learning_rate": 6.8726865199695e-05,
      "loss": 46.0,
      "step": 880
    },
    {
      "epoch": 0.14867316373454836,
      "grad_norm": 0.0005790110444650054,
      "learning_rate": 6.852071823465383e-05,
      "loss": 46.0,
      "step": 881
    },
    {
      "epoch": 0.14884191874446273,
      "grad_norm": 0.002223587827757001,
      "learning_rate": 6.831471965369914e-05,
      "loss": 46.0,
      "step": 882
    },
    {
      "epoch": 0.14901067375437707,
      "grad_norm": 0.002203379524871707,
      "learning_rate": 6.810887042784777e-05,
      "loss": 46.0,
      "step": 883
    },
    {
      "epoch": 0.14917942876429144,
      "grad_norm": 0.002671119524165988,
      "learning_rate": 6.790317152741249e-05,
      "loss": 46.0,
      "step": 884
    },
    {
      "epoch": 0.1493481837742058,
      "grad_norm": 0.001122485613450408,
      "learning_rate": 6.769762392199748e-05,
      "loss": 46.0,
      "step": 885
    },
    {
      "epoch": 0.14951693878412015,
      "grad_norm": 0.0007207071175798774,
      "learning_rate": 6.749222858049382e-05,
      "loss": 46.0,
      "step": 886
    },
    {
      "epoch": 0.1496856937940345,
      "grad_norm": 0.0007145823910832405,
      "learning_rate": 6.728698647107475e-05,
      "loss": 46.0,
      "step": 887
    },
    {
      "epoch": 0.14985444880394888,
      "grad_norm": 0.0008989453199319541,
      "learning_rate": 6.708189856119128e-05,
      "loss": 46.0,
      "step": 888
    },
    {
      "epoch": 0.15002320381386322,
      "grad_norm": 0.0011213996913284063,
      "learning_rate": 6.687696581756763e-05,
      "loss": 46.0,
      "step": 889
    },
    {
      "epoch": 0.1501919588237776,
      "grad_norm": 0.0005325007368810475,
      "learning_rate": 6.667218920619649e-05,
      "loss": 46.0,
      "step": 890
    },
    {
      "epoch": 0.15036071383369193,
      "grad_norm": 0.0008855744963511825,
      "learning_rate": 6.646756969233463e-05,
      "loss": 46.0,
      "step": 891
    },
    {
      "epoch": 0.1505294688436063,
      "grad_norm": 0.0007719965651631355,
      "learning_rate": 6.626310824049838e-05,
      "loss": 46.0,
      "step": 892
    },
    {
      "epoch": 0.15069822385352066,
      "grad_norm": 0.001363624120131135,
      "learning_rate": 6.605880581445894e-05,
      "loss": 46.0,
      "step": 893
    },
    {
      "epoch": 0.150866978863435,
      "grad_norm": 0.0006050238152965903,
      "learning_rate": 6.585466337723796e-05,
      "loss": 46.0,
      "step": 894
    },
    {
      "epoch": 0.15103573387334937,
      "grad_norm": 0.00020488305017352104,
      "learning_rate": 6.565068189110286e-05,
      "loss": 46.0,
      "step": 895
    },
    {
      "epoch": 0.1512044888832637,
      "grad_norm": 0.000528375618159771,
      "learning_rate": 6.544686231756246e-05,
      "loss": 46.0,
      "step": 896
    },
    {
      "epoch": 0.15137324389317808,
      "grad_norm": 0.0006251371232792735,
      "learning_rate": 6.52432056173624e-05,
      "loss": 46.0,
      "step": 897
    },
    {
      "epoch": 0.15154199890309245,
      "grad_norm": 0.000713423069100827,
      "learning_rate": 6.503971275048042e-05,
      "loss": 46.0,
      "step": 898
    },
    {
      "epoch": 0.15171075391300679,
      "grad_norm": 0.0008193039102479815,
      "learning_rate": 6.483638467612227e-05,
      "loss": 46.0,
      "step": 899
    },
    {
      "epoch": 0.15187950892292115,
      "grad_norm": 0.000522931688465178,
      "learning_rate": 6.463322235271666e-05,
      "loss": 46.0,
      "step": 900
    },
    {
      "epoch": 0.1520482639328355,
      "grad_norm": 0.001072798273526132,
      "learning_rate": 6.44302267379111e-05,
      "loss": 46.0,
      "step": 901
    },
    {
      "epoch": 0.15221701894274986,
      "grad_norm": 0.0006312826299108565,
      "learning_rate": 6.422739878856735e-05,
      "loss": 46.0,
      "step": 902
    },
    {
      "epoch": 0.15238577395266423,
      "grad_norm": 0.002877163467928767,
      "learning_rate": 6.402473946075671e-05,
      "loss": 46.0,
      "step": 903
    },
    {
      "epoch": 0.15255452896257857,
      "grad_norm": 0.0008996203541755676,
      "learning_rate": 6.382224970975572e-05,
      "loss": 46.0,
      "step": 904
    },
    {
      "epoch": 0.15272328397249293,
      "grad_norm": 0.0006124867359176278,
      "learning_rate": 6.361993049004163e-05,
      "loss": 46.0,
      "step": 905
    },
    {
      "epoch": 0.1528920389824073,
      "grad_norm": 0.001687378971837461,
      "learning_rate": 6.341778275528773e-05,
      "loss": 46.0,
      "step": 906
    },
    {
      "epoch": 0.15306079399232164,
      "grad_norm": 0.001008613035082817,
      "learning_rate": 6.321580745835915e-05,
      "loss": 46.0,
      "step": 907
    },
    {
      "epoch": 0.153229549002236,
      "grad_norm": 0.0006372160278260708,
      "learning_rate": 6.301400555130805e-05,
      "loss": 46.0,
      "step": 908
    },
    {
      "epoch": 0.15339830401215035,
      "grad_norm": 0.0005495776422321796,
      "learning_rate": 6.281237798536932e-05,
      "loss": 46.0,
      "step": 909
    },
    {
      "epoch": 0.15356705902206472,
      "grad_norm": 0.0008954824879765511,
      "learning_rate": 6.261092571095614e-05,
      "loss": 46.0,
      "step": 910
    },
    {
      "epoch": 0.15373581403197908,
      "grad_norm": 0.001377457519993186,
      "learning_rate": 6.240964967765528e-05,
      "loss": 46.0,
      "step": 911
    },
    {
      "epoch": 0.15390456904189342,
      "grad_norm": 0.0011930032633244991,
      "learning_rate": 6.220855083422285e-05,
      "loss": 46.0,
      "step": 912
    },
    {
      "epoch": 0.1540733240518078,
      "grad_norm": 0.0013531373115256429,
      "learning_rate": 6.200763012857973e-05,
      "loss": 46.0,
      "step": 913
    },
    {
      "epoch": 0.15424207906172213,
      "grad_norm": 0.0009069031220860779,
      "learning_rate": 6.180688850780711e-05,
      "loss": 46.0,
      "step": 914
    },
    {
      "epoch": 0.1544108340716365,
      "grad_norm": 0.0008781193173490465,
      "learning_rate": 6.160632691814203e-05,
      "loss": 46.0,
      "step": 915
    },
    {
      "epoch": 0.15457958908155087,
      "grad_norm": 0.0026150341145694256,
      "learning_rate": 6.140594630497287e-05,
      "loss": 46.0,
      "step": 916
    },
    {
      "epoch": 0.1547483440914652,
      "grad_norm": 0.0013742209412157536,
      "learning_rate": 6.120574761283497e-05,
      "loss": 46.0,
      "step": 917
    },
    {
      "epoch": 0.15491709910137957,
      "grad_norm": 0.0027837788220494986,
      "learning_rate": 6.100573178540619e-05,
      "loss": 46.0,
      "step": 918
    },
    {
      "epoch": 0.15508585411129394,
      "grad_norm": 0.000727921084035188,
      "learning_rate": 6.080589976550233e-05,
      "loss": 46.0,
      "step": 919
    },
    {
      "epoch": 0.15525460912120828,
      "grad_norm": 0.0010688056936487556,
      "learning_rate": 6.06062524950729e-05,
      "loss": 46.0,
      "step": 920
    },
    {
      "epoch": 0.15542336413112265,
      "grad_norm": 0.0006157424068078399,
      "learning_rate": 6.040679091519643e-05,
      "loss": 46.0,
      "step": 921
    },
    {
      "epoch": 0.155592119141037,
      "grad_norm": 0.0018731793388724327,
      "learning_rate": 6.020751596607621e-05,
      "loss": 46.0,
      "step": 922
    },
    {
      "epoch": 0.15576087415095136,
      "grad_norm": 0.0011395288165658712,
      "learning_rate": 6.000842858703585e-05,
      "loss": 46.0,
      "step": 923
    },
    {
      "epoch": 0.15592962916086572,
      "grad_norm": 0.0004488412232603878,
      "learning_rate": 5.980952971651472e-05,
      "loss": 46.0,
      "step": 924
    },
    {
      "epoch": 0.15609838417078006,
      "grad_norm": 0.0012818429386243224,
      "learning_rate": 5.9610820292063665e-05,
      "loss": 46.0,
      "step": 925
    },
    {
      "epoch": 0.15626713918069443,
      "grad_norm": 0.0011927819577977061,
      "learning_rate": 5.9412301250340584e-05,
      "loss": 46.0,
      "step": 926
    },
    {
      "epoch": 0.15643589419060877,
      "grad_norm": 0.0010172206675633788,
      "learning_rate": 5.921397352710587e-05,
      "loss": 46.0,
      "step": 927
    },
    {
      "epoch": 0.15660464920052314,
      "grad_norm": 0.001916920649819076,
      "learning_rate": 5.9015838057218196e-05,
      "loss": 46.0,
      "step": 928
    },
    {
      "epoch": 0.1567734042104375,
      "grad_norm": 0.00064216268947348,
      "learning_rate": 5.881789577462993e-05,
      "loss": 46.0,
      "step": 929
    },
    {
      "epoch": 0.15694215922035185,
      "grad_norm": 0.0006899251602590084,
      "learning_rate": 5.862014761238281e-05,
      "loss": 46.0,
      "step": 930
    },
    {
      "epoch": 0.1571109142302662,
      "grad_norm": 0.0008296074229292572,
      "learning_rate": 5.842259450260366e-05,
      "loss": 46.0,
      "step": 931
    },
    {
      "epoch": 0.15727966924018058,
      "grad_norm": 0.0014352595899254084,
      "learning_rate": 5.822523737649974e-05,
      "loss": 46.0,
      "step": 932
    },
    {
      "epoch": 0.15744842425009492,
      "grad_norm": 0.0011921770637854934,
      "learning_rate": 5.80280771643546e-05,
      "loss": 46.0,
      "step": 933
    },
    {
      "epoch": 0.1576171792600093,
      "grad_norm": 0.0011576671386137605,
      "learning_rate": 5.7831114795523547e-05,
      "loss": 46.0,
      "step": 934
    },
    {
      "epoch": 0.15778593426992363,
      "grad_norm": 0.000808388867881149,
      "learning_rate": 5.7634351198429304e-05,
      "loss": 46.0,
      "step": 935
    },
    {
      "epoch": 0.157954689279838,
      "grad_norm": 0.0016751131042838097,
      "learning_rate": 5.74377873005577e-05,
      "loss": 46.0,
      "step": 936
    },
    {
      "epoch": 0.15812344428975236,
      "grad_norm": 0.0008697423618286848,
      "learning_rate": 5.724142402845318e-05,
      "loss": 46.0,
      "step": 937
    },
    {
      "epoch": 0.1582921992996667,
      "grad_norm": 0.0005162409506738186,
      "learning_rate": 5.7045262307714497e-05,
      "loss": 46.0,
      "step": 938
    },
    {
      "epoch": 0.15846095430958107,
      "grad_norm": 0.0005604674806818366,
      "learning_rate": 5.68493030629905e-05,
      "loss": 46.0,
      "step": 939
    },
    {
      "epoch": 0.1586297093194954,
      "grad_norm": 0.0007223184802569449,
      "learning_rate": 5.6653547217975354e-05,
      "loss": 46.0,
      "step": 940
    },
    {
      "epoch": 0.15879846432940978,
      "grad_norm": 0.0010802462929859757,
      "learning_rate": 5.645799569540463e-05,
      "loss": 46.0,
      "step": 941
    },
    {
      "epoch": 0.15896721933932415,
      "grad_norm": 0.0014821782242506742,
      "learning_rate": 5.626264941705086e-05,
      "loss": 46.0,
      "step": 942
    },
    {
      "epoch": 0.15913597434923848,
      "grad_norm": 0.000693553127348423,
      "learning_rate": 5.606750930371888e-05,
      "loss": 46.0,
      "step": 943
    },
    {
      "epoch": 0.15930472935915285,
      "grad_norm": 0.000700769480317831,
      "learning_rate": 5.587257627524195e-05,
      "loss": 46.0,
      "step": 944
    },
    {
      "epoch": 0.15947348436906722,
      "grad_norm": 0.0012417947873473167,
      "learning_rate": 5.567785125047708e-05,
      "loss": 46.0,
      "step": 945
    },
    {
      "epoch": 0.15964223937898156,
      "grad_norm": 0.0007827091030776501,
      "learning_rate": 5.548333514730082e-05,
      "loss": 46.0,
      "step": 946
    },
    {
      "epoch": 0.15981099438889593,
      "grad_norm": 0.0004621573316399008,
      "learning_rate": 5.528902888260493e-05,
      "loss": 46.0,
      "step": 947
    },
    {
      "epoch": 0.15997974939881027,
      "grad_norm": 0.0009308409062214196,
      "learning_rate": 5.509493337229208e-05,
      "loss": 46.0,
      "step": 948
    },
    {
      "epoch": 0.16014850440872463,
      "grad_norm": 0.000546308874618262,
      "learning_rate": 5.4901049531271474e-05,
      "loss": 46.0,
      "step": 949
    },
    {
      "epoch": 0.160317259418639,
      "grad_norm": 0.002490605926141143,
      "learning_rate": 5.470737827345458e-05,
      "loss": 46.0,
      "step": 950
    },
    {
      "epoch": 0.16048601442855334,
      "grad_norm": 0.0007810198585502803,
      "learning_rate": 5.451392051175079e-05,
      "loss": 46.0,
      "step": 951
    },
    {
      "epoch": 0.1606547694384677,
      "grad_norm": 0.0004202220879960805,
      "learning_rate": 5.4320677158063246e-05,
      "loss": 46.0,
      "step": 952
    },
    {
      "epoch": 0.16082352444838205,
      "grad_norm": 0.0005529209738597274,
      "learning_rate": 5.4127649123284264e-05,
      "loss": 46.0,
      "step": 953
    },
    {
      "epoch": 0.16099227945829642,
      "grad_norm": 0.000546621042303741,
      "learning_rate": 5.3934837317291276e-05,
      "loss": 46.0,
      "step": 954
    },
    {
      "epoch": 0.16116103446821078,
      "grad_norm": 0.0007445806404575706,
      "learning_rate": 5.374224264894261e-05,
      "loss": 46.0,
      "step": 955
    },
    {
      "epoch": 0.16132978947812512,
      "grad_norm": 0.0006346721784211695,
      "learning_rate": 5.354986602607279e-05,
      "loss": 46.0,
      "step": 956
    },
    {
      "epoch": 0.1614985444880395,
      "grad_norm": 0.0010019588517025113,
      "learning_rate": 5.335770835548883e-05,
      "loss": 46.0,
      "step": 957
    },
    {
      "epoch": 0.16166729949795386,
      "grad_norm": 0.001932504354044795,
      "learning_rate": 5.316577054296551e-05,
      "loss": 46.0,
      "step": 958
    },
    {
      "epoch": 0.1618360545078682,
      "grad_norm": 0.0013626782456412911,
      "learning_rate": 5.2974053493241274e-05,
      "loss": 46.0,
      "step": 959
    },
    {
      "epoch": 0.16200480951778257,
      "grad_norm": 0.0013853182317689061,
      "learning_rate": 5.278255811001398e-05,
      "loss": 46.0,
      "step": 960
    },
    {
      "epoch": 0.1621735645276969,
      "grad_norm": 0.00069183245068416,
      "learning_rate": 5.259128529593661e-05,
      "loss": 46.0,
      "step": 961
    },
    {
      "epoch": 0.16234231953761127,
      "grad_norm": 0.00028503654175437987,
      "learning_rate": 5.240023595261301e-05,
      "loss": 46.0,
      "step": 962
    },
    {
      "epoch": 0.16251107454752564,
      "grad_norm": 0.0003797942481469363,
      "learning_rate": 5.2209410980593674e-05,
      "loss": 46.0,
      "step": 963
    },
    {
      "epoch": 0.16267982955743998,
      "grad_norm": 0.0009620683849789202,
      "learning_rate": 5.2018811279371416e-05,
      "loss": 46.0,
      "step": 964
    },
    {
      "epoch": 0.16284858456735435,
      "grad_norm": 0.0010808344231918454,
      "learning_rate": 5.1828437747377354e-05,
      "loss": 46.0,
      "step": 965
    },
    {
      "epoch": 0.1630173395772687,
      "grad_norm": 0.0012897250708192587,
      "learning_rate": 5.163829128197626e-05,
      "loss": 46.0,
      "step": 966
    },
    {
      "epoch": 0.16318609458718306,
      "grad_norm": 0.0009198890766128898,
      "learning_rate": 5.144837277946273e-05,
      "loss": 46.0,
      "step": 967
    },
    {
      "epoch": 0.16335484959709742,
      "grad_norm": 0.0015073876129463315,
      "learning_rate": 5.125868313505691e-05,
      "loss": 46.0,
      "step": 968
    },
    {
      "epoch": 0.16352360460701176,
      "grad_norm": 0.000782114511821419,
      "learning_rate": 5.1069223242899876e-05,
      "loss": 46.0,
      "step": 969
    },
    {
      "epoch": 0.16369235961692613,
      "grad_norm": 0.0009239850332960486,
      "learning_rate": 5.087999399605006e-05,
      "loss": 46.0,
      "step": 970
    },
    {
      "epoch": 0.16386111462684047,
      "grad_norm": 0.0014594901585951447,
      "learning_rate": 5.0690996286478464e-05,
      "loss": 46.0,
      "step": 971
    },
    {
      "epoch": 0.16402986963675484,
      "grad_norm": 0.0007826307555660605,
      "learning_rate": 5.050223100506479e-05,
      "loss": 46.0,
      "step": 972
    },
    {
      "epoch": 0.1641986246466692,
      "grad_norm": 0.0008899096283130348,
      "learning_rate": 5.031369904159311e-05,
      "loss": 46.0,
      "step": 973
    },
    {
      "epoch": 0.16436737965658355,
      "grad_norm": 0.0009231562726199627,
      "learning_rate": 5.012540128474773e-05,
      "loss": 46.0,
      "step": 974
    },
    {
      "epoch": 0.1645361346664979,
      "grad_norm": 0.0013324058381840587,
      "learning_rate": 4.993733862210894e-05,
      "loss": 46.0,
      "step": 975
    },
    {
      "epoch": 0.16470488967641228,
      "grad_norm": 0.0003165770904161036,
      "learning_rate": 4.97495119401489e-05,
      "loss": 46.0,
      "step": 976
    },
    {
      "epoch": 0.16487364468632662,
      "grad_norm": 0.0030829953029751778,
      "learning_rate": 4.956192212422737e-05,
      "loss": 46.0,
      "step": 977
    },
    {
      "epoch": 0.165042399696241,
      "grad_norm": 0.0007854963187128305,
      "learning_rate": 4.9374570058587735e-05,
      "loss": 46.0,
      "step": 978
    },
    {
      "epoch": 0.16521115470615533,
      "grad_norm": 0.0004671328642871231,
      "learning_rate": 4.918745662635249e-05,
      "loss": 46.0,
      "step": 979
    },
    {
      "epoch": 0.1653799097160697,
      "grad_norm": 0.0011242057662457228,
      "learning_rate": 4.900058270951938e-05,
      "loss": 46.0,
      "step": 980
    },
    {
      "epoch": 0.16554866472598406,
      "grad_norm": 0.0012494113761931658,
      "learning_rate": 4.881394918895727e-05,
      "loss": 46.0,
      "step": 981
    },
    {
      "epoch": 0.1657174197358984,
      "grad_norm": 0.0012623087968677282,
      "learning_rate": 4.862755694440164e-05,
      "loss": 46.0,
      "step": 982
    },
    {
      "epoch": 0.16588617474581277,
      "grad_norm": 0.0015923196915537119,
      "learning_rate": 4.8441406854450764e-05,
      "loss": 46.0,
      "step": 983
    },
    {
      "epoch": 0.1660549297557271,
      "grad_norm": 0.0006417816039174795,
      "learning_rate": 4.8255499796561564e-05,
      "loss": 46.0,
      "step": 984
    },
    {
      "epoch": 0.16622368476564148,
      "grad_norm": 0.0016659084940329194,
      "learning_rate": 4.806983664704525e-05,
      "loss": 46.0,
      "step": 985
    },
    {
      "epoch": 0.16639243977555584,
      "grad_norm": 0.0007746117771603167,
      "learning_rate": 4.788441828106338e-05,
      "loss": 46.0,
      "step": 986
    },
    {
      "epoch": 0.16656119478547018,
      "grad_norm": 0.002332525560632348,
      "learning_rate": 4.769924557262364e-05,
      "loss": 46.0,
      "step": 987
    },
    {
      "epoch": 0.16672994979538455,
      "grad_norm": 0.0006849498022347689,
      "learning_rate": 4.751431939457579e-05,
      "loss": 46.0,
      "step": 988
    },
    {
      "epoch": 0.16689870480529892,
      "grad_norm": 0.0005523057770915329,
      "learning_rate": 4.732964061860752e-05,
      "loss": 46.0,
      "step": 989
    },
    {
      "epoch": 0.16706745981521326,
      "grad_norm": 0.000503736431710422,
      "learning_rate": 4.71452101152403e-05,
      "loss": 46.0,
      "step": 990
    },
    {
      "epoch": 0.16723621482512763,
      "grad_norm": 0.0019155279733240604,
      "learning_rate": 4.6961028753825364e-05,
      "loss": 46.0,
      "step": 991
    },
    {
      "epoch": 0.16740496983504197,
      "grad_norm": 0.00047645490849390626,
      "learning_rate": 4.677709740253953e-05,
      "loss": 46.0,
      "step": 992
    },
    {
      "epoch": 0.16757372484495633,
      "grad_norm": 0.00034740299452096224,
      "learning_rate": 4.659341692838113e-05,
      "loss": 46.0,
      "step": 993
    },
    {
      "epoch": 0.1677424798548707,
      "grad_norm": 0.0007441159687004983,
      "learning_rate": 4.640998819716605e-05,
      "loss": 46.0,
      "step": 994
    },
    {
      "epoch": 0.16791123486478504,
      "grad_norm": 0.0011230899253860116,
      "learning_rate": 4.622681207352335e-05,
      "loss": 46.0,
      "step": 995
    },
    {
      "epoch": 0.1680799898746994,
      "grad_norm": 0.0009723291150294244,
      "learning_rate": 4.604388942089146e-05,
      "loss": 46.0,
      "step": 996
    },
    {
      "epoch": 0.16824874488461375,
      "grad_norm": 0.0005291839479468763,
      "learning_rate": 4.586122110151414e-05,
      "loss": 46.0,
      "step": 997
    },
    {
      "epoch": 0.16841749989452812,
      "grad_norm": 0.0013463158393278718,
      "learning_rate": 4.5678807976436034e-05,
      "loss": 46.0,
      "step": 998
    },
    {
      "epoch": 0.16858625490444248,
      "grad_norm": 0.0005321825738064945,
      "learning_rate": 4.549665090549913e-05,
      "loss": 46.0,
      "step": 999
    },
    {
      "epoch": 0.16875500991435682,
      "grad_norm": 0.0006381895509548485,
      "learning_rate": 4.531475074733832e-05,
      "loss": 46.0,
      "step": 1000
    },
    {
      "epoch": 0.1689237649242712,
      "grad_norm": 0.0013920076889917254,
      "learning_rate": 4.5133108359377504e-05,
      "loss": 46.0,
      "step": 1001
    },
    {
      "epoch": 0.16909251993418556,
      "grad_norm": 0.0016773123061284423,
      "learning_rate": 4.495172459782556e-05,
      "loss": 46.0,
      "step": 1002
    },
    {
      "epoch": 0.1692612749440999,
      "grad_norm": 0.0011099010007455945,
      "learning_rate": 4.477060031767223e-05,
      "loss": 46.0,
      "step": 1003
    },
    {
      "epoch": 0.16943002995401427,
      "grad_norm": 0.0020444022957235575,
      "learning_rate": 4.4589736372684166e-05,
      "loss": 46.0,
      "step": 1004
    },
    {
      "epoch": 0.1695987849639286,
      "grad_norm": 0.0008166442858055234,
      "learning_rate": 4.440913361540087e-05,
      "loss": 46.0,
      "step": 1005
    },
    {
      "epoch": 0.16976753997384297,
      "grad_norm": 0.0013784863986074924,
      "learning_rate": 4.4228792897130654e-05,
      "loss": 46.0,
      "step": 1006
    },
    {
      "epoch": 0.16993629498375734,
      "grad_norm": 0.0005884088459424675,
      "learning_rate": 4.40487150679468e-05,
      "loss": 46.0,
      "step": 1007
    },
    {
      "epoch": 0.17010504999367168,
      "grad_norm": 0.000776112952735275,
      "learning_rate": 4.386890097668317e-05,
      "loss": 46.0,
      "step": 1008
    },
    {
      "epoch": 0.17027380500358605,
      "grad_norm": 0.0008750237757340074,
      "learning_rate": 4.3689351470930574e-05,
      "loss": 46.0,
      "step": 1009
    },
    {
      "epoch": 0.1704425600135004,
      "grad_norm": 0.000761337811127305,
      "learning_rate": 4.351006739703275e-05,
      "loss": 46.0,
      "step": 1010
    },
    {
      "epoch": 0.17061131502341476,
      "grad_norm": 0.0031786442268639803,
      "learning_rate": 4.333104960008203e-05,
      "loss": 46.0,
      "step": 1011
    },
    {
      "epoch": 0.17078007003332912,
      "grad_norm": 0.004400862380862236,
      "learning_rate": 4.3152298923915734e-05,
      "loss": 46.0,
      "step": 1012
    },
    {
      "epoch": 0.17094882504324346,
      "grad_norm": 0.0022119544446468353,
      "learning_rate": 4.2973816211112095e-05,
      "loss": 46.0,
      "step": 1013
    },
    {
      "epoch": 0.17111758005315783,
      "grad_norm": 0.000868487695697695,
      "learning_rate": 4.279560230298616e-05,
      "loss": 46.0,
      "step": 1014
    },
    {
      "epoch": 0.1712863350630722,
      "grad_norm": 0.0006466138293035328,
      "learning_rate": 4.261765803958594e-05,
      "loss": 46.0,
      "step": 1015
    },
    {
      "epoch": 0.17145509007298654,
      "grad_norm": 0.0007957179914228618,
      "learning_rate": 4.243998425968841e-05,
      "loss": 46.0,
      "step": 1016
    },
    {
      "epoch": 0.1716238450829009,
      "grad_norm": 0.00033654290018603206,
      "learning_rate": 4.2262581800795586e-05,
      "loss": 46.0,
      "step": 1017
    },
    {
      "epoch": 0.17179260009281525,
      "grad_norm": 0.003317353082820773,
      "learning_rate": 4.2085451499130524e-05,
      "loss": 46.0,
      "step": 1018
    },
    {
      "epoch": 0.1719613551027296,
      "grad_norm": 0.00047639801050536335,
      "learning_rate": 4.190859418963341e-05,
      "loss": 46.0,
      "step": 1019
    },
    {
      "epoch": 0.17213011011264398,
      "grad_norm": 0.002305058529600501,
      "learning_rate": 4.173201070595775e-05,
      "loss": 46.0,
      "step": 1020
    },
    {
      "epoch": 0.17229886512255832,
      "grad_norm": 0.0012476850533857942,
      "learning_rate": 4.1555701880466105e-05,
      "loss": 46.0,
      "step": 1021
    },
    {
      "epoch": 0.1724676201324727,
      "grad_norm": 0.004061322659254074,
      "learning_rate": 4.137966854422647e-05,
      "loss": 46.0,
      "step": 1022
    },
    {
      "epoch": 0.17263637514238703,
      "grad_norm": 0.0006147538078948855,
      "learning_rate": 4.120391152700841e-05,
      "loss": 46.0,
      "step": 1023
    },
    {
      "epoch": 0.1728051301523014,
      "grad_norm": 0.0011847203131765127,
      "learning_rate": 4.102843165727873e-05,
      "loss": 46.0,
      "step": 1024
    },
    {
      "epoch": 0.17297388516221576,
      "grad_norm": 0.0005954731605015695,
      "learning_rate": 4.085322976219802e-05,
      "loss": 46.0,
      "step": 1025
    },
    {
      "epoch": 0.1731426401721301,
      "grad_norm": 0.0009544877684675157,
      "learning_rate": 4.0678306667616606e-05,
      "loss": 46.0,
      "step": 1026
    },
    {
      "epoch": 0.17331139518204447,
      "grad_norm": 0.0006944774650037289,
      "learning_rate": 4.050366319807052e-05,
      "loss": 46.0,
      "step": 1027
    },
    {
      "epoch": 0.17348015019195884,
      "grad_norm": 0.002084943698719144,
      "learning_rate": 4.0329300176777775e-05,
      "loss": 46.0,
      "step": 1028
    },
    {
      "epoch": 0.17364890520187318,
      "grad_norm": 0.002356353448703885,
      "learning_rate": 4.015521842563444e-05,
      "loss": 46.0,
      "step": 1029
    },
    {
      "epoch": 0.17381766021178754,
      "grad_norm": 0.0005482649430632591,
      "learning_rate": 3.998141876521074e-05,
      "loss": 46.0,
      "step": 1030
    },
    {
      "epoch": 0.17398641522170188,
      "grad_norm": 0.0008993589435704052,
      "learning_rate": 3.980790201474723e-05,
      "loss": 46.0,
      "step": 1031
    },
    {
      "epoch": 0.17415517023161625,
      "grad_norm": 0.0011524191359058022,
      "learning_rate": 3.9634668992150895e-05,
      "loss": 46.0,
      "step": 1032
    },
    {
      "epoch": 0.17432392524153062,
      "grad_norm": 0.0006604307563975453,
      "learning_rate": 3.9461720513991285e-05,
      "loss": 46.0,
      "step": 1033
    },
    {
      "epoch": 0.17449268025144496,
      "grad_norm": 0.0013253578217700124,
      "learning_rate": 3.928905739549675e-05,
      "loss": 46.0,
      "step": 1034
    },
    {
      "epoch": 0.17466143526135933,
      "grad_norm": 0.0005015510250814259,
      "learning_rate": 3.911668045055047e-05,
      "loss": 46.0,
      "step": 1035
    },
    {
      "epoch": 0.17483019027127367,
      "grad_norm": 0.0005467745941132307,
      "learning_rate": 3.8944590491686816e-05,
      "loss": 46.0,
      "step": 1036
    },
    {
      "epoch": 0.17499894528118803,
      "grad_norm": 0.0007600914686918259,
      "learning_rate": 3.87727883300872e-05,
      "loss": 46.0,
      "step": 1037
    },
    {
      "epoch": 0.1751677002911024,
      "grad_norm": 0.0011487057199701667,
      "learning_rate": 3.860127477557654e-05,
      "loss": 46.0,
      "step": 1038
    },
    {
      "epoch": 0.17533645530101674,
      "grad_norm": 0.003172243246808648,
      "learning_rate": 3.8430050636619406e-05,
      "loss": 46.0,
      "step": 1039
    },
    {
      "epoch": 0.1755052103109311,
      "grad_norm": 0.0005271739210002124,
      "learning_rate": 3.825911672031611e-05,
      "loss": 46.0,
      "step": 1040
    },
    {
      "epoch": 0.17567396532084545,
      "grad_norm": 0.001015481655485928,
      "learning_rate": 3.8088473832398806e-05,
      "loss": 46.0,
      "step": 1041
    },
    {
      "epoch": 0.17584272033075982,
      "grad_norm": 0.0004703805025201291,
      "learning_rate": 3.7918122777228026e-05,
      "loss": 46.0,
      "step": 1042
    },
    {
      "epoch": 0.17601147534067418,
      "grad_norm": 0.001103199552744627,
      "learning_rate": 3.774806435778858e-05,
      "loss": 46.0,
      "step": 1043
    },
    {
      "epoch": 0.17618023035058852,
      "grad_norm": 0.0005269388784654438,
      "learning_rate": 3.757829937568591e-05,
      "loss": 46.0,
      "step": 1044
    },
    {
      "epoch": 0.1763489853605029,
      "grad_norm": 0.0017238155705854297,
      "learning_rate": 3.7408828631142237e-05,
      "loss": 46.0,
      "step": 1045
    },
    {
      "epoch": 0.17651774037041726,
      "grad_norm": 0.0006927169160917401,
      "learning_rate": 3.7239652922992894e-05,
      "loss": 46.0,
      "step": 1046
    },
    {
      "epoch": 0.1766864953803316,
      "grad_norm": 0.002759503899142146,
      "learning_rate": 3.707077304868245e-05,
      "loss": 46.0,
      "step": 1047
    },
    {
      "epoch": 0.17685525039024597,
      "grad_norm": 0.0005645275814458728,
      "learning_rate": 3.690218980426101e-05,
      "loss": 46.0,
      "step": 1048
    },
    {
      "epoch": 0.1770240054001603,
      "grad_norm": 0.00040731692570261657,
      "learning_rate": 3.6733903984380545e-05,
      "loss": 46.0,
      "step": 1049
    },
    {
      "epoch": 0.17719276041007467,
      "grad_norm": 0.0008958344697020948,
      "learning_rate": 3.656591638229088e-05,
      "loss": 46.0,
      "step": 1050
    },
    {
      "epoch": 0.17736151541998904,
      "grad_norm": 0.0009003482409752905,
      "learning_rate": 3.63982277898362e-05,
      "loss": 46.0,
      "step": 1051
    },
    {
      "epoch": 0.17753027042990338,
      "grad_norm": 0.00033022521529346704,
      "learning_rate": 3.6230838997451365e-05,
      "loss": 46.0,
      "step": 1052
    },
    {
      "epoch": 0.17769902543981775,
      "grad_norm": 0.0003871631342917681,
      "learning_rate": 3.606375079415797e-05,
      "loss": 46.0,
      "step": 1053
    },
    {
      "epoch": 0.1778677804497321,
      "grad_norm": 0.0018638977780938148,
      "learning_rate": 3.589696396756058e-05,
      "loss": 46.0,
      "step": 1054
    },
    {
      "epoch": 0.17803653545964646,
      "grad_norm": 0.0007231778581626713,
      "learning_rate": 3.5730479303843446e-05,
      "loss": 46.0,
      "step": 1055
    },
    {
      "epoch": 0.17820529046956082,
      "grad_norm": 0.001085842028260231,
      "learning_rate": 3.556429758776629e-05,
      "loss": 46.0,
      "step": 1056
    },
    {
      "epoch": 0.17837404547947516,
      "grad_norm": 0.0005750608397647738,
      "learning_rate": 3.5398419602660935e-05,
      "loss": 46.0,
      "step": 1057
    },
    {
      "epoch": 0.17854280048938953,
      "grad_norm": 0.0007418044842779636,
      "learning_rate": 3.523284613042745e-05,
      "loss": 46.0,
      "step": 1058
    },
    {
      "epoch": 0.1787115554993039,
      "grad_norm": 0.004355406854301691,
      "learning_rate": 3.506757795153056e-05,
      "loss": 46.0,
      "step": 1059
    },
    {
      "epoch": 0.17888031050921824,
      "grad_norm": 0.0003858884156215936,
      "learning_rate": 3.49026158449959e-05,
      "loss": 46.0,
      "step": 1060
    },
    {
      "epoch": 0.1790490655191326,
      "grad_norm": 0.0006274758488871157,
      "learning_rate": 3.4737960588406374e-05,
      "loss": 46.0,
      "step": 1061
    },
    {
      "epoch": 0.17921782052904695,
      "grad_norm": 0.001342911273241043,
      "learning_rate": 3.457361295789849e-05,
      "loss": 46.0,
      "step": 1062
    },
    {
      "epoch": 0.1793865755389613,
      "grad_norm": 0.0006192997680045664,
      "learning_rate": 3.440957372815872e-05,
      "loss": 46.0,
      "step": 1063
    },
    {
      "epoch": 0.17955533054887568,
      "grad_norm": 0.0011153030209243298,
      "learning_rate": 3.424584367241973e-05,
      "loss": 46.0,
      "step": 1064
    },
    {
      "epoch": 0.17972408555879002,
      "grad_norm": 0.0015017263358458877,
      "learning_rate": 3.408242356245701e-05,
      "loss": 46.0,
      "step": 1065
    },
    {
      "epoch": 0.1798928405687044,
      "grad_norm": 0.0004973539616912603,
      "learning_rate": 3.3919314168584924e-05,
      "loss": 46.0,
      "step": 1066
    },
    {
      "epoch": 0.18006159557861873,
      "grad_norm": 0.00341922789812088,
      "learning_rate": 3.375651625965315e-05,
      "loss": 46.0,
      "step": 1067
    },
    {
      "epoch": 0.1802303505885331,
      "grad_norm": 0.0006390162161551416,
      "learning_rate": 3.3594030603043304e-05,
      "loss": 46.0,
      "step": 1068
    },
    {
      "epoch": 0.18039910559844746,
      "grad_norm": 0.00042940647108480334,
      "learning_rate": 3.3431857964665003e-05,
      "loss": 46.0,
      "step": 1069
    },
    {
      "epoch": 0.1805678606083618,
      "grad_norm": 0.0022605019621551037,
      "learning_rate": 3.326999910895242e-05,
      "loss": 46.0,
      "step": 1070
    },
    {
      "epoch": 0.18073661561827617,
      "grad_norm": 0.0012887527700513601,
      "learning_rate": 3.3108454798860656e-05,
      "loss": 46.0,
      "step": 1071
    },
    {
      "epoch": 0.18090537062819054,
      "grad_norm": 0.0010164948180317879,
      "learning_rate": 3.2947225795862125e-05,
      "loss": 46.0,
      "step": 1072
    },
    {
      "epoch": 0.18107412563810488,
      "grad_norm": 0.002262418856844306,
      "learning_rate": 3.278631285994298e-05,
      "loss": 46.0,
      "step": 1073
    },
    {
      "epoch": 0.18124288064801924,
      "grad_norm": 0.00039437160012312233,
      "learning_rate": 3.2625716749599546e-05,
      "loss": 46.0,
      "step": 1074
    },
    {
      "epoch": 0.18141163565793358,
      "grad_norm": 0.0006692737806588411,
      "learning_rate": 3.246543822183469e-05,
      "loss": 46.0,
      "step": 1075
    },
    {
      "epoch": 0.18158039066784795,
      "grad_norm": 0.0014356159372255206,
      "learning_rate": 3.2305478032154324e-05,
      "loss": 46.0,
      "step": 1076
    },
    {
      "epoch": 0.18174914567776232,
      "grad_norm": 0.001208776026032865,
      "learning_rate": 3.2145836934563745e-05,
      "loss": 46.0,
      "step": 1077
    },
    {
      "epoch": 0.18191790068767666,
      "grad_norm": 0.0006577944732271135,
      "learning_rate": 3.198651568156426e-05,
      "loss": 46.0,
      "step": 1078
    },
    {
      "epoch": 0.18208665569759103,
      "grad_norm": 0.0010622587287798524,
      "learning_rate": 3.1827515024149445e-05,
      "loss": 46.0,
      "step": 1079
    },
    {
      "epoch": 0.18225541070750537,
      "grad_norm": 0.0010634633945301175,
      "learning_rate": 3.166883571180159e-05,
      "loss": 46.0,
      "step": 1080
    },
    {
      "epoch": 0.18242416571741973,
      "grad_norm": 0.0010152349714189768,
      "learning_rate": 3.151047849248847e-05,
      "loss": 46.0,
      "step": 1081
    },
    {
      "epoch": 0.1825929207273341,
      "grad_norm": 0.0008210540981963277,
      "learning_rate": 3.1352444112659484e-05,
      "loss": 46.0,
      "step": 1082
    },
    {
      "epoch": 0.18276167573724844,
      "grad_norm": 0.0006314092315733433,
      "learning_rate": 3.119473331724219e-05,
      "loss": 46.0,
      "step": 1083
    },
    {
      "epoch": 0.1829304307471628,
      "grad_norm": 0.0015025155153125525,
      "learning_rate": 3.103734684963902e-05,
      "loss": 46.0,
      "step": 1084
    },
    {
      "epoch": 0.18309918575707718,
      "grad_norm": 0.0008753265137784183,
      "learning_rate": 3.088028545172352e-05,
      "loss": 46.0,
      "step": 1085
    },
    {
      "epoch": 0.18326794076699152,
      "grad_norm": 0.0014098742976784706,
      "learning_rate": 3.0723549863836996e-05,
      "loss": 46.0,
      "step": 1086
    },
    {
      "epoch": 0.18343669577690588,
      "grad_norm": 0.0014926984440535307,
      "learning_rate": 3.056714082478496e-05,
      "loss": 46.0,
      "step": 1087
    },
    {
      "epoch": 0.18360545078682022,
      "grad_norm": 0.0008304324583150446,
      "learning_rate": 3.0411059071833668e-05,
      "loss": 46.0,
      "step": 1088
    },
    {
      "epoch": 0.1837742057967346,
      "grad_norm": 0.0004860296321567148,
      "learning_rate": 3.025530534070664e-05,
      "loss": 46.0,
      "step": 1089
    },
    {
      "epoch": 0.18394296080664896,
      "grad_norm": 0.0006332461489364505,
      "learning_rate": 3.00998803655812e-05,
      "loss": 46.0,
      "step": 1090
    },
    {
      "epoch": 0.1841117158165633,
      "grad_norm": 0.0013554253382608294,
      "learning_rate": 2.9944784879085065e-05,
      "loss": 46.0,
      "step": 1091
    },
    {
      "epoch": 0.18428047082647767,
      "grad_norm": 0.0008776708273217082,
      "learning_rate": 2.979001961229281e-05,
      "loss": 46.0,
      "step": 1092
    },
    {
      "epoch": 0.184449225836392,
      "grad_norm": 0.001277380739338696,
      "learning_rate": 2.9635585294722336e-05,
      "loss": 46.0,
      "step": 1093
    },
    {
      "epoch": 0.18461798084630637,
      "grad_norm": 0.000868733914103359,
      "learning_rate": 2.948148265433174e-05,
      "loss": 46.0,
      "step": 1094
    },
    {
      "epoch": 0.18478673585622074,
      "grad_norm": 0.0004029095871374011,
      "learning_rate": 2.9327712417515597e-05,
      "loss": 46.0,
      "step": 1095
    },
    {
      "epoch": 0.18478673585622074,
      "eval_loss": 11.5,
      "eval_runtime": 14.1998,
      "eval_samples_per_second": 175.707,
      "eval_steps_per_second": 87.888,
      "step": 1095
    },
    {
      "epoch": 0.18495549086613508,
      "grad_norm": 0.0018527479842305183,
      "learning_rate": 2.917427530910154e-05,
      "loss": 46.0,
      "step": 1096
    },
    {
      "epoch": 0.18512424587604945,
      "grad_norm": 0.000325352099025622,
      "learning_rate": 2.9021172052347113e-05,
      "loss": 46.0,
      "step": 1097
    },
    {
      "epoch": 0.18529300088596382,
      "grad_norm": 0.0008033345802687109,
      "learning_rate": 2.886840336893606e-05,
      "loss": 46.0,
      "step": 1098
    },
    {
      "epoch": 0.18546175589587816,
      "grad_norm": 0.003062557429075241,
      "learning_rate": 2.871596997897509e-05,
      "loss": 46.0,
      "step": 1099
    },
    {
      "epoch": 0.18563051090579252,
      "grad_norm": 0.0005990050849504769,
      "learning_rate": 2.8563872600990394e-05,
      "loss": 46.0,
      "step": 1100
    },
    {
      "epoch": 0.18579926591570686,
      "grad_norm": 0.00030623353086411953,
      "learning_rate": 2.841211195192436e-05,
      "loss": 46.0,
      "step": 1101
    },
    {
      "epoch": 0.18596802092562123,
      "grad_norm": 0.0005539836129173636,
      "learning_rate": 2.826068874713208e-05,
      "loss": 46.0,
      "step": 1102
    },
    {
      "epoch": 0.1861367759355356,
      "grad_norm": 0.0013511950382962823,
      "learning_rate": 2.8109603700378074e-05,
      "loss": 46.0,
      "step": 1103
    },
    {
      "epoch": 0.18630553094544994,
      "grad_norm": 0.0009734542109072208,
      "learning_rate": 2.795885752383284e-05,
      "loss": 46.0,
      "step": 1104
    },
    {
      "epoch": 0.1864742859553643,
      "grad_norm": 0.000757011875975877,
      "learning_rate": 2.780845092806964e-05,
      "loss": 46.0,
      "step": 1105
    },
    {
      "epoch": 0.18664304096527864,
      "grad_norm": 0.0009687045239843428,
      "learning_rate": 2.7658384622060873e-05,
      "loss": 46.0,
      "step": 1106
    },
    {
      "epoch": 0.186811795975193,
      "grad_norm": 0.0008338790503330529,
      "learning_rate": 2.7508659313175112e-05,
      "loss": 46.0,
      "step": 1107
    },
    {
      "epoch": 0.18698055098510738,
      "grad_norm": 0.000879534287378192,
      "learning_rate": 2.735927570717348e-05,
      "loss": 46.0,
      "step": 1108
    },
    {
      "epoch": 0.18714930599502172,
      "grad_norm": 0.000895138131454587,
      "learning_rate": 2.7210234508206313e-05,
      "loss": 46.0,
      "step": 1109
    },
    {
      "epoch": 0.1873180610049361,
      "grad_norm": 0.0016813467955216765,
      "learning_rate": 2.706153641881013e-05,
      "loss": 46.0,
      "step": 1110
    },
    {
      "epoch": 0.18748681601485043,
      "grad_norm": 0.00046861334703862667,
      "learning_rate": 2.6913182139904014e-05,
      "loss": 46.0,
      "step": 1111
    },
    {
      "epoch": 0.1876555710247648,
      "grad_norm": 0.0008181455777958035,
      "learning_rate": 2.6765172370786463e-05,
      "loss": 46.0,
      "step": 1112
    },
    {
      "epoch": 0.18782432603467916,
      "grad_norm": 0.000511069200001657,
      "learning_rate": 2.6617507809132038e-05,
      "loss": 46.0,
      "step": 1113
    },
    {
      "epoch": 0.1879930810445935,
      "grad_norm": 0.0009120728354901075,
      "learning_rate": 2.647018915098809e-05,
      "loss": 46.0,
      "step": 1114
    },
    {
      "epoch": 0.18816183605450787,
      "grad_norm": 0.0011518978280946612,
      "learning_rate": 2.6323217090771512e-05,
      "loss": 46.0,
      "step": 1115
    },
    {
      "epoch": 0.18833059106442224,
      "grad_norm": 0.001370842568576336,
      "learning_rate": 2.6176592321265425e-05,
      "loss": 46.0,
      "step": 1116
    },
    {
      "epoch": 0.18849934607433658,
      "grad_norm": 0.002501879585906863,
      "learning_rate": 2.6030315533615878e-05,
      "loss": 46.0,
      "step": 1117
    },
    {
      "epoch": 0.18866810108425094,
      "grad_norm": 0.0005397344939410686,
      "learning_rate": 2.588438741732876e-05,
      "loss": 46.0,
      "step": 1118
    },
    {
      "epoch": 0.18883685609416528,
      "grad_norm": 0.0013044921215623617,
      "learning_rate": 2.5738808660266235e-05,
      "loss": 46.0,
      "step": 1119
    },
    {
      "epoch": 0.18900561110407965,
      "grad_norm": 0.0007338232826441526,
      "learning_rate": 2.5593579948643874e-05,
      "loss": 46.0,
      "step": 1120
    },
    {
      "epoch": 0.18917436611399402,
      "grad_norm": 0.0011662261094897985,
      "learning_rate": 2.5448701967027167e-05,
      "loss": 46.0,
      "step": 1121
    },
    {
      "epoch": 0.18934312112390836,
      "grad_norm": 0.00048239415627904236,
      "learning_rate": 2.5304175398328278e-05,
      "loss": 46.0,
      "step": 1122
    },
    {
      "epoch": 0.18951187613382273,
      "grad_norm": 0.002095632255077362,
      "learning_rate": 2.5160000923803063e-05,
      "loss": 46.0,
      "step": 1123
    },
    {
      "epoch": 0.18968063114373707,
      "grad_norm": 0.00038515665801241994,
      "learning_rate": 2.501617922304763e-05,
      "loss": 46.0,
      "step": 1124
    },
    {
      "epoch": 0.18984938615365143,
      "grad_norm": 0.0008104884182102978,
      "learning_rate": 2.4872710973995204e-05,
      "loss": 46.0,
      "step": 1125
    },
    {
      "epoch": 0.1900181411635658,
      "grad_norm": 0.0006576053565368056,
      "learning_rate": 2.4729596852912996e-05,
      "loss": 46.0,
      "step": 1126
    },
    {
      "epoch": 0.19018689617348014,
      "grad_norm": 0.0006934543489478528,
      "learning_rate": 2.4586837534398898e-05,
      "loss": 46.0,
      "step": 1127
    },
    {
      "epoch": 0.1903556511833945,
      "grad_norm": 0.0016504209488630295,
      "learning_rate": 2.444443369137841e-05,
      "loss": 46.0,
      "step": 1128
    },
    {
      "epoch": 0.19052440619330888,
      "grad_norm": 0.0016103170346468687,
      "learning_rate": 2.4302385995101417e-05,
      "loss": 46.0,
      "step": 1129
    },
    {
      "epoch": 0.19069316120322322,
      "grad_norm": 0.0009479466243647039,
      "learning_rate": 2.4160695115138998e-05,
      "loss": 46.0,
      "step": 1130
    },
    {
      "epoch": 0.19086191621313758,
      "grad_norm": 0.0006660729413852096,
      "learning_rate": 2.401936171938043e-05,
      "loss": 46.0,
      "step": 1131
    },
    {
      "epoch": 0.19103067122305192,
      "grad_norm": 0.002802348928526044,
      "learning_rate": 2.3878386474029724e-05,
      "loss": 46.0,
      "step": 1132
    },
    {
      "epoch": 0.1911994262329663,
      "grad_norm": 0.0021046160254627466,
      "learning_rate": 2.373777004360278e-05,
      "loss": 46.0,
      "step": 1133
    },
    {
      "epoch": 0.19136818124288066,
      "grad_norm": 0.0005733026191592216,
      "learning_rate": 2.3597513090924228e-05,
      "loss": 46.0,
      "step": 1134
    },
    {
      "epoch": 0.191536936252795,
      "grad_norm": 0.001300279633142054,
      "learning_rate": 2.3457616277124018e-05,
      "loss": 46.0,
      "step": 1135
    },
    {
      "epoch": 0.19170569126270937,
      "grad_norm": 0.0020821443758904934,
      "learning_rate": 2.3318080261634723e-05,
      "loss": 46.0,
      "step": 1136
    },
    {
      "epoch": 0.1918744462726237,
      "grad_norm": 0.0010038736509159207,
      "learning_rate": 2.3178905702188124e-05,
      "loss": 46.0,
      "step": 1137
    },
    {
      "epoch": 0.19204320128253807,
      "grad_norm": 0.0026241031009703875,
      "learning_rate": 2.3040093254812123e-05,
      "loss": 46.0,
      "step": 1138
    },
    {
      "epoch": 0.19221195629245244,
      "grad_norm": 0.0008017763611860573,
      "learning_rate": 2.2901643573827914e-05,
      "loss": 46.0,
      "step": 1139
    },
    {
      "epoch": 0.19238071130236678,
      "grad_norm": 0.0006083068437874317,
      "learning_rate": 2.2763557311846583e-05,
      "loss": 46.0,
      "step": 1140
    },
    {
      "epoch": 0.19254946631228115,
      "grad_norm": 0.0014171472284942865,
      "learning_rate": 2.2625835119766202e-05,
      "loss": 46.0,
      "step": 1141
    },
    {
      "epoch": 0.19271822132219552,
      "grad_norm": 0.00046038886648602784,
      "learning_rate": 2.2488477646768724e-05,
      "loss": 46.0,
      "step": 1142
    },
    {
      "epoch": 0.19288697633210986,
      "grad_norm": 0.0006552223348990083,
      "learning_rate": 2.235148554031694e-05,
      "loss": 46.0,
      "step": 1143
    },
    {
      "epoch": 0.19305573134202422,
      "grad_norm": 0.0009186275419779122,
      "learning_rate": 2.221485944615137e-05,
      "loss": 46.0,
      "step": 1144
    },
    {
      "epoch": 0.19322448635193856,
      "grad_norm": 0.0005543860606849194,
      "learning_rate": 2.207860000828731e-05,
      "loss": 46.0,
      "step": 1145
    },
    {
      "epoch": 0.19339324136185293,
      "grad_norm": 0.0007307975320145488,
      "learning_rate": 2.1942707869011682e-05,
      "loss": 46.0,
      "step": 1146
    },
    {
      "epoch": 0.1935619963717673,
      "grad_norm": 0.0009457955020479858,
      "learning_rate": 2.1807183668880194e-05,
      "loss": 46.0,
      "step": 1147
    },
    {
      "epoch": 0.19373075138168164,
      "grad_norm": 0.0009510859963484108,
      "learning_rate": 2.1672028046713998e-05,
      "loss": 46.0,
      "step": 1148
    },
    {
      "epoch": 0.193899506391596,
      "grad_norm": 0.0012068504001945257,
      "learning_rate": 2.1537241639597082e-05,
      "loss": 46.0,
      "step": 1149
    },
    {
      "epoch": 0.19406826140151034,
      "grad_norm": 0.0008088087779469788,
      "learning_rate": 2.1402825082872968e-05,
      "loss": 46.0,
      "step": 1150
    },
    {
      "epoch": 0.1942370164114247,
      "grad_norm": 0.00042066810419782996,
      "learning_rate": 2.1268779010141737e-05,
      "loss": 46.0,
      "step": 1151
    },
    {
      "epoch": 0.19440577142133908,
      "grad_norm": 0.0008168341009877622,
      "learning_rate": 2.1135104053257272e-05,
      "loss": 46.0,
      "step": 1152
    },
    {
      "epoch": 0.19457452643125342,
      "grad_norm": 0.0011002906830981374,
      "learning_rate": 2.1001800842324026e-05,
      "loss": 46.0,
      "step": 1153
    },
    {
      "epoch": 0.1947432814411678,
      "grad_norm": 0.0006414004019461572,
      "learning_rate": 2.0868870005694173e-05,
      "loss": 46.0,
      "step": 1154
    },
    {
      "epoch": 0.19491203645108215,
      "grad_norm": 0.0005531748756766319,
      "learning_rate": 2.0736312169964635e-05,
      "loss": 46.0,
      "step": 1155
    },
    {
      "epoch": 0.1950807914609965,
      "grad_norm": 0.000641561346128583,
      "learning_rate": 2.0604127959974107e-05,
      "loss": 46.0,
      "step": 1156
    },
    {
      "epoch": 0.19524954647091086,
      "grad_norm": 0.0006983129424043,
      "learning_rate": 2.0472317998800106e-05,
      "loss": 46.0,
      "step": 1157
    },
    {
      "epoch": 0.1954183014808252,
      "grad_norm": 0.0006936094141565263,
      "learning_rate": 2.0340882907756076e-05,
      "loss": 46.0,
      "step": 1158
    },
    {
      "epoch": 0.19558705649073957,
      "grad_norm": 0.0006084066699258983,
      "learning_rate": 2.020982330638841e-05,
      "loss": 46.0,
      "step": 1159
    },
    {
      "epoch": 0.19575581150065394,
      "grad_norm": 0.004117981996387243,
      "learning_rate": 2.0079139812473636e-05,
      "loss": 46.0,
      "step": 1160
    },
    {
      "epoch": 0.19592456651056828,
      "grad_norm": 0.0016725576715543866,
      "learning_rate": 1.994883304201527e-05,
      "loss": 46.0,
      "step": 1161
    },
    {
      "epoch": 0.19609332152048264,
      "grad_norm": 0.000729929655790329,
      "learning_rate": 1.981890360924116e-05,
      "loss": 46.0,
      "step": 1162
    },
    {
      "epoch": 0.19626207653039698,
      "grad_norm": 0.0005720785120502114,
      "learning_rate": 1.9689352126600536e-05,
      "loss": 46.0,
      "step": 1163
    },
    {
      "epoch": 0.19643083154031135,
      "grad_norm": 0.0008306180825456977,
      "learning_rate": 1.9560179204760954e-05,
      "loss": 46.0,
      "step": 1164
    },
    {
      "epoch": 0.19659958655022572,
      "grad_norm": 0.0006977242301218212,
      "learning_rate": 1.943138545260569e-05,
      "loss": 46.0,
      "step": 1165
    },
    {
      "epoch": 0.19676834156014006,
      "grad_norm": 0.0005357645568437874,
      "learning_rate": 1.9302971477230613e-05,
      "loss": 46.0,
      "step": 1166
    },
    {
      "epoch": 0.19693709657005443,
      "grad_norm": 0.0009179019252769649,
      "learning_rate": 1.9174937883941503e-05,
      "loss": 46.0,
      "step": 1167
    },
    {
      "epoch": 0.1971058515799688,
      "grad_norm": 0.000713715679012239,
      "learning_rate": 1.904728527625108e-05,
      "loss": 46.0,
      "step": 1168
    },
    {
      "epoch": 0.19727460658988313,
      "grad_norm": 0.0006716207135468721,
      "learning_rate": 1.8920014255876263e-05,
      "loss": 46.0,
      "step": 1169
    },
    {
      "epoch": 0.1974433615997975,
      "grad_norm": 0.0018655718304216862,
      "learning_rate": 1.8793125422735235e-05,
      "loss": 46.0,
      "step": 1170
    },
    {
      "epoch": 0.19761211660971184,
      "grad_norm": 0.0017995084635913372,
      "learning_rate": 1.8666619374944684e-05,
      "loss": 46.0,
      "step": 1171
    },
    {
      "epoch": 0.1977808716196262,
      "grad_norm": 0.001270498731173575,
      "learning_rate": 1.8540496708816935e-05,
      "loss": 46.0,
      "step": 1172
    },
    {
      "epoch": 0.19794962662954058,
      "grad_norm": 0.0005029829917475581,
      "learning_rate": 1.8414758018857248e-05,
      "loss": 46.0,
      "step": 1173
    },
    {
      "epoch": 0.19811838163945492,
      "grad_norm": 0.0007416327716782689,
      "learning_rate": 1.8289403897760815e-05,
      "loss": 46.0,
      "step": 1174
    },
    {
      "epoch": 0.19828713664936928,
      "grad_norm": 0.0004362264589872211,
      "learning_rate": 1.8164434936410114e-05,
      "loss": 46.0,
      "step": 1175
    },
    {
      "epoch": 0.19845589165928362,
      "grad_norm": 0.0005498372483998537,
      "learning_rate": 1.8039851723872204e-05,
      "loss": 46.0,
      "step": 1176
    },
    {
      "epoch": 0.198624646669198,
      "grad_norm": 0.0005490055773407221,
      "learning_rate": 1.7915654847395646e-05,
      "loss": 46.0,
      "step": 1177
    },
    {
      "epoch": 0.19879340167911236,
      "grad_norm": 0.0007399431779049337,
      "learning_rate": 1.7791844892408104e-05,
      "loss": 46.0,
      "step": 1178
    },
    {
      "epoch": 0.1989621566890267,
      "grad_norm": 0.00065398751758039,
      "learning_rate": 1.766842244251332e-05,
      "loss": 46.0,
      "step": 1179
    },
    {
      "epoch": 0.19913091169894107,
      "grad_norm": 0.0003902087628375739,
      "learning_rate": 1.7545388079488455e-05,
      "loss": 46.0,
      "step": 1180
    },
    {
      "epoch": 0.19929966670885543,
      "grad_norm": 0.0006421052967198193,
      "learning_rate": 1.7422742383281355e-05,
      "loss": 46.0,
      "step": 1181
    },
    {
      "epoch": 0.19946842171876977,
      "grad_norm": 0.000821156136225909,
      "learning_rate": 1.7300485932007794e-05,
      "loss": 46.0,
      "step": 1182
    },
    {
      "epoch": 0.19963717672868414,
      "grad_norm": 0.000910441973246634,
      "learning_rate": 1.7178619301948774e-05,
      "loss": 46.0,
      "step": 1183
    },
    {
      "epoch": 0.19980593173859848,
      "grad_norm": 0.0008174055255949497,
      "learning_rate": 1.7057143067547788e-05,
      "loss": 46.0,
      "step": 1184
    },
    {
      "epoch": 0.19997468674851285,
      "grad_norm": 0.0007186917937360704,
      "learning_rate": 1.69360578014081e-05,
      "loss": 46.0,
      "step": 1185
    },
    {
      "epoch": 0.20014344175842722,
      "grad_norm": 0.000985615304671228,
      "learning_rate": 1.6815364074290153e-05,
      "loss": 46.0,
      "step": 1186
    },
    {
      "epoch": 0.20031219676834155,
      "grad_norm": 0.0004897791077382863,
      "learning_rate": 1.6695062455108646e-05,
      "loss": 46.0,
      "step": 1187
    },
    {
      "epoch": 0.20048095177825592,
      "grad_norm": 0.0006414767703972757,
      "learning_rate": 1.6575153510930065e-05,
      "loss": 46.0,
      "step": 1188
    },
    {
      "epoch": 0.20064970678817026,
      "grad_norm": 0.00031621320522390306,
      "learning_rate": 1.6455637806970027e-05,
      "loss": 46.0,
      "step": 1189
    },
    {
      "epoch": 0.20081846179808463,
      "grad_norm": 0.004970474634319544,
      "learning_rate": 1.6336515906590354e-05,
      "loss": 46.0,
      "step": 1190
    },
    {
      "epoch": 0.200987216807999,
      "grad_norm": 0.0018685179529711604,
      "learning_rate": 1.621778837129676e-05,
      "loss": 46.0,
      "step": 1191
    },
    {
      "epoch": 0.20115597181791334,
      "grad_norm": 0.0012024708557873964,
      "learning_rate": 1.6099455760735937e-05,
      "loss": 46.0,
      "step": 1192
    },
    {
      "epoch": 0.2013247268278277,
      "grad_norm": 0.0004908541450276971,
      "learning_rate": 1.598151863269306e-05,
      "loss": 46.0,
      "step": 1193
    },
    {
      "epoch": 0.20149348183774204,
      "grad_norm": 0.0007760179578326643,
      "learning_rate": 1.5863977543089092e-05,
      "loss": 46.0,
      "step": 1194
    },
    {
      "epoch": 0.2016622368476564,
      "grad_norm": 0.00047840975457802415,
      "learning_rate": 1.5746833045978216e-05,
      "loss": 46.0,
      "step": 1195
    },
    {
      "epoch": 0.20183099185757078,
      "grad_norm": 0.0011836383491754532,
      "learning_rate": 1.563008569354516e-05,
      "loss": 46.0,
      "step": 1196
    },
    {
      "epoch": 0.20199974686748512,
      "grad_norm": 0.000663977290969342,
      "learning_rate": 1.5513736036102644e-05,
      "loss": 46.0,
      "step": 1197
    },
    {
      "epoch": 0.2021685018773995,
      "grad_norm": 0.0010960629442706704,
      "learning_rate": 1.5397784622088772e-05,
      "loss": 46.0,
      "step": 1198
    },
    {
      "epoch": 0.20233725688731385,
      "grad_norm": 0.000916484568733722,
      "learning_rate": 1.52822319980645e-05,
      "loss": 46.0,
      "step": 1199
    },
    {
      "epoch": 0.2025060118972282,
      "grad_norm": 0.0016858198214322329,
      "learning_rate": 1.516707870871089e-05,
      "loss": 46.0,
      "step": 1200
    },
    {
      "epoch": 0.20267476690714256,
      "grad_norm": 0.0004934574826620519,
      "learning_rate": 1.5052325296826708e-05,
      "loss": 46.0,
      "step": 1201
    },
    {
      "epoch": 0.2028435219170569,
      "grad_norm": 0.0013131732121109962,
      "learning_rate": 1.4937972303325909e-05,
      "loss": 46.0,
      "step": 1202
    },
    {
      "epoch": 0.20301227692697127,
      "grad_norm": 0.0024639791809022427,
      "learning_rate": 1.4824020267234828e-05,
      "loss": 46.0,
      "step": 1203
    },
    {
      "epoch": 0.20318103193688564,
      "grad_norm": 0.001286798040382564,
      "learning_rate": 1.471046972568989e-05,
      "loss": 46.0,
      "step": 1204
    },
    {
      "epoch": 0.20334978694679998,
      "grad_norm": 0.0017689433880150318,
      "learning_rate": 1.4597321213935045e-05,
      "loss": 46.0,
      "step": 1205
    },
    {
      "epoch": 0.20351854195671434,
      "grad_norm": 0.0009689538856036961,
      "learning_rate": 1.4484575265319112e-05,
      "loss": 46.0,
      "step": 1206
    },
    {
      "epoch": 0.20368729696662868,
      "grad_norm": 0.0009614164009690285,
      "learning_rate": 1.4372232411293373e-05,
      "loss": 46.0,
      "step": 1207
    },
    {
      "epoch": 0.20385605197654305,
      "grad_norm": 0.0008921355474740267,
      "learning_rate": 1.4260293181409023e-05,
      "loss": 46.0,
      "step": 1208
    },
    {
      "epoch": 0.20402480698645742,
      "grad_norm": 0.0005731172277592123,
      "learning_rate": 1.414875810331473e-05,
      "loss": 46.0,
      "step": 1209
    },
    {
      "epoch": 0.20419356199637176,
      "grad_norm": 0.0005115303792990744,
      "learning_rate": 1.4037627702754064e-05,
      "loss": 46.0,
      "step": 1210
    },
    {
      "epoch": 0.20436231700628613,
      "grad_norm": 0.0010284315794706345,
      "learning_rate": 1.3926902503563099e-05,
      "loss": 46.0,
      "step": 1211
    },
    {
      "epoch": 0.2045310720162005,
      "grad_norm": 0.0005095731467008591,
      "learning_rate": 1.3816583027667895e-05,
      "loss": 46.0,
      "step": 1212
    },
    {
      "epoch": 0.20469982702611483,
      "grad_norm": 0.0006683744722977281,
      "learning_rate": 1.370666979508206e-05,
      "loss": 46.0,
      "step": 1213
    },
    {
      "epoch": 0.2048685820360292,
      "grad_norm": 0.0013482188805937767,
      "learning_rate": 1.3597163323904238e-05,
      "loss": 46.0,
      "step": 1214
    },
    {
      "epoch": 0.20503733704594354,
      "grad_norm": 0.0006872873054817319,
      "learning_rate": 1.3488064130315858e-05,
      "loss": 46.0,
      "step": 1215
    },
    {
      "epoch": 0.2052060920558579,
      "grad_norm": 0.000505985866766423,
      "learning_rate": 1.3379372728578387e-05,
      "loss": 46.0,
      "step": 1216
    },
    {
      "epoch": 0.20537484706577228,
      "grad_norm": 0.0005020827520638704,
      "learning_rate": 1.3271089631031152e-05,
      "loss": 46.0,
      "step": 1217
    },
    {
      "epoch": 0.20554360207568662,
      "grad_norm": 0.0010777136776596308,
      "learning_rate": 1.316321534808893e-05,
      "loss": 46.0,
      "step": 1218
    },
    {
      "epoch": 0.20571235708560098,
      "grad_norm": 0.0018740965751931071,
      "learning_rate": 1.3055750388239374e-05,
      "loss": 46.0,
      "step": 1219
    },
    {
      "epoch": 0.20588111209551532,
      "grad_norm": 0.0017112314235419035,
      "learning_rate": 1.2948695258040734e-05,
      "loss": 46.0,
      "step": 1220
    },
    {
      "epoch": 0.2060498671054297,
      "grad_norm": 0.0021086453925818205,
      "learning_rate": 1.2842050462119426e-05,
      "loss": 46.0,
      "step": 1221
    },
    {
      "epoch": 0.20621862211534406,
      "grad_norm": 0.0006901904125697911,
      "learning_rate": 1.2735816503167708e-05,
      "loss": 46.0,
      "step": 1222
    },
    {
      "epoch": 0.2063873771252584,
      "grad_norm": 0.0009343160782009363,
      "learning_rate": 1.2629993881941249e-05,
      "loss": 46.0,
      "step": 1223
    },
    {
      "epoch": 0.20655613213517277,
      "grad_norm": 0.0008830896113067865,
      "learning_rate": 1.2524583097256793e-05,
      "loss": 46.0,
      "step": 1224
    },
    {
      "epoch": 0.20672488714508713,
      "grad_norm": 0.0018291313899680972,
      "learning_rate": 1.2419584645989823e-05,
      "loss": 46.0,
      "step": 1225
    },
    {
      "epoch": 0.20689364215500147,
      "grad_norm": 0.00036748184356838465,
      "learning_rate": 1.2314999023072182e-05,
      "loss": 46.0,
      "step": 1226
    },
    {
      "epoch": 0.20706239716491584,
      "grad_norm": 0.0010965716792270541,
      "learning_rate": 1.2210826721489765e-05,
      "loss": 46.0,
      "step": 1227
    },
    {
      "epoch": 0.20723115217483018,
      "grad_norm": 0.0008960228879004717,
      "learning_rate": 1.2107068232280238e-05,
      "loss": 46.0,
      "step": 1228
    },
    {
      "epoch": 0.20739990718474455,
      "grad_norm": 0.004184830002486706,
      "learning_rate": 1.2003724044530596e-05,
      "loss": 46.0,
      "step": 1229
    },
    {
      "epoch": 0.20756866219465891,
      "grad_norm": 0.0006089484668336809,
      "learning_rate": 1.190079464537498e-05,
      "loss": 46.0,
      "step": 1230
    },
    {
      "epoch": 0.20773741720457325,
      "grad_norm": 0.0005351770669221878,
      "learning_rate": 1.1798280519992366e-05,
      "loss": 46.0,
      "step": 1231
    },
    {
      "epoch": 0.20790617221448762,
      "grad_norm": 0.000453771062893793,
      "learning_rate": 1.169618215160424e-05,
      "loss": 46.0,
      "step": 1232
    },
    {
      "epoch": 0.20807492722440196,
      "grad_norm": 0.0010923264781013131,
      "learning_rate": 1.1594500021472243e-05,
      "loss": 46.0,
      "step": 1233
    },
    {
      "epoch": 0.20824368223431633,
      "grad_norm": 0.0011471729958429933,
      "learning_rate": 1.1493234608896141e-05,
      "loss": 46.0,
      "step": 1234
    },
    {
      "epoch": 0.2084124372442307,
      "grad_norm": 0.0009632937726564705,
      "learning_rate": 1.1392386391211307e-05,
      "loss": 46.0,
      "step": 1235
    },
    {
      "epoch": 0.20858119225414504,
      "grad_norm": 0.0008420674130320549,
      "learning_rate": 1.1291955843786617e-05,
      "loss": 46.0,
      "step": 1236
    },
    {
      "epoch": 0.2087499472640594,
      "grad_norm": 0.0013222956331446767,
      "learning_rate": 1.1191943440022179e-05,
      "loss": 46.0,
      "step": 1237
    },
    {
      "epoch": 0.20891870227397377,
      "grad_norm": 0.001955078449100256,
      "learning_rate": 1.1092349651347055e-05,
      "loss": 46.0,
      "step": 1238
    },
    {
      "epoch": 0.2090874572838881,
      "grad_norm": 0.0019749640487134457,
      "learning_rate": 1.0993174947217144e-05,
      "loss": 46.0,
      "step": 1239
    },
    {
      "epoch": 0.20925621229380248,
      "grad_norm": 0.0010516421170905232,
      "learning_rate": 1.089441979511282e-05,
      "loss": 46.0,
      "step": 1240
    },
    {
      "epoch": 0.20942496730371682,
      "grad_norm": 0.0008922962588258088,
      "learning_rate": 1.079608466053692e-05,
      "loss": 46.0,
      "step": 1241
    },
    {
      "epoch": 0.2095937223136312,
      "grad_norm": 0.0005941609852015972,
      "learning_rate": 1.0698170007012332e-05,
      "loss": 46.0,
      "step": 1242
    },
    {
      "epoch": 0.20976247732354555,
      "grad_norm": 0.0004414799332153052,
      "learning_rate": 1.060067629607996e-05,
      "loss": 46.0,
      "step": 1243
    },
    {
      "epoch": 0.2099312323334599,
      "grad_norm": 0.002375362440943718,
      "learning_rate": 1.0503603987296562e-05,
      "loss": 46.0,
      "step": 1244
    },
    {
      "epoch": 0.21009998734337426,
      "grad_norm": 0.0010825851932168007,
      "learning_rate": 1.0406953538232479e-05,
      "loss": 46.0,
      "step": 1245
    },
    {
      "epoch": 0.2102687423532886,
      "grad_norm": 0.0012761763064190745,
      "learning_rate": 1.0310725404469479e-05,
      "loss": 46.0,
      "step": 1246
    },
    {
      "epoch": 0.21043749736320297,
      "grad_norm": 0.0008807352278381586,
      "learning_rate": 1.0214920039598774e-05,
      "loss": 46.0,
      "step": 1247
    },
    {
      "epoch": 0.21060625237311734,
      "grad_norm": 0.0006370813935063779,
      "learning_rate": 1.011953789521869e-05,
      "loss": 46.0,
      "step": 1248
    },
    {
      "epoch": 0.21077500738303168,
      "grad_norm": 0.0006687640561722219,
      "learning_rate": 1.0024579420932633e-05,
      "loss": 46.0,
      "step": 1249
    },
    {
      "epoch": 0.21094376239294604,
      "grad_norm": 0.0018508404027670622,
      "learning_rate": 9.93004506434696e-06,
      "loss": 46.0,
      "step": 1250
    },
    {
      "epoch": 0.2111125174028604,
      "grad_norm": 0.00048345469986088574,
      "learning_rate": 9.835935271068842e-06,
      "loss": 46.0,
      "step": 1251
    },
    {
      "epoch": 0.21128127241277475,
      "grad_norm": 0.0006890186341479421,
      "learning_rate": 9.74225048470422e-06,
      "loss": 46.0,
      "step": 1252
    },
    {
      "epoch": 0.21145002742268912,
      "grad_norm": 0.0008637924329377711,
      "learning_rate": 9.648991146855636e-06,
      "loss": 46.0,
      "step": 1253
    },
    {
      "epoch": 0.21161878243260346,
      "grad_norm": 0.0007931495201773942,
      "learning_rate": 9.556157697120215e-06,
      "loss": 46.0,
      "step": 1254
    },
    {
      "epoch": 0.21178753744251783,
      "grad_norm": 0.0005852460162714124,
      "learning_rate": 9.463750573087571e-06,
      "loss": 46.0,
      "step": 1255
    },
    {
      "epoch": 0.2119562924524322,
      "grad_norm": 0.0008610963704995811,
      "learning_rate": 9.371770210337727e-06,
      "loss": 46.0,
      "step": 1256
    },
    {
      "epoch": 0.21212504746234653,
      "grad_norm": 0.000635376840364188,
      "learning_rate": 9.280217042439122e-06,
      "loss": 46.0,
      "step": 1257
    },
    {
      "epoch": 0.2122938024722609,
      "grad_norm": 0.0008849898586049676,
      "learning_rate": 9.18909150094649e-06,
      "loss": 46.0,
      "step": 1258
    },
    {
      "epoch": 0.21246255748217524,
      "grad_norm": 0.0008719837060198188,
      "learning_rate": 9.098394015398814e-06,
      "loss": 46.0,
      "step": 1259
    },
    {
      "epoch": 0.2126313124920896,
      "grad_norm": 0.0021515190601348877,
      "learning_rate": 9.008125013317448e-06,
      "loss": 46.0,
      "step": 1260
    },
    {
      "epoch": 0.21280006750200398,
      "grad_norm": 0.00268219574354589,
      "learning_rate": 8.918284920203934e-06,
      "loss": 46.0,
      "step": 1261
    },
    {
      "epoch": 0.21296882251191832,
      "grad_norm": 0.0009798301616683602,
      "learning_rate": 8.828874159538091e-06,
      "loss": 46.0,
      "step": 1262
    },
    {
      "epoch": 0.21313757752183268,
      "grad_norm": 0.000541876011993736,
      "learning_rate": 8.739893152775958e-06,
      "loss": 46.0,
      "step": 1263
    },
    {
      "epoch": 0.21330633253174702,
      "grad_norm": 0.00041087419958785176,
      "learning_rate": 8.651342319347867e-06,
      "loss": 46.0,
      "step": 1264
    },
    {
      "epoch": 0.2134750875416614,
      "grad_norm": 0.0012815961381420493,
      "learning_rate": 8.563222076656429e-06,
      "loss": 46.0,
      "step": 1265
    },
    {
      "epoch": 0.21364384255157576,
      "grad_norm": 0.000593251024838537,
      "learning_rate": 8.475532840074585e-06,
      "loss": 46.0,
      "step": 1266
    },
    {
      "epoch": 0.2138125975614901,
      "grad_norm": 0.00048146312474273145,
      "learning_rate": 8.388275022943648e-06,
      "loss": 46.0,
      "step": 1267
    },
    {
      "epoch": 0.21398135257140447,
      "grad_norm": 0.0008700843318365514,
      "learning_rate": 8.301449036571319e-06,
      "loss": 46.0,
      "step": 1268
    },
    {
      "epoch": 0.21415010758131883,
      "grad_norm": 0.0005789704155176878,
      "learning_rate": 8.215055290229779e-06,
      "loss": 46.0,
      "step": 1269
    },
    {
      "epoch": 0.21431886259123317,
      "grad_norm": 0.0015106883365660906,
      "learning_rate": 8.129094191153808e-06,
      "loss": 46.0,
      "step": 1270
    },
    {
      "epoch": 0.21448761760114754,
      "grad_norm": 0.0004934304743073881,
      "learning_rate": 8.04356614453874e-06,
      "loss": 46.0,
      "step": 1271
    },
    {
      "epoch": 0.21465637261106188,
      "grad_norm": 0.0019693290814757347,
      "learning_rate": 7.95847155353865e-06,
      "loss": 46.0,
      "step": 1272
    },
    {
      "epoch": 0.21482512762097625,
      "grad_norm": 0.002005916088819504,
      "learning_rate": 7.873810819264483e-06,
      "loss": 46.0,
      "step": 1273
    },
    {
      "epoch": 0.21499388263089061,
      "grad_norm": 0.0012562709162011743,
      "learning_rate": 7.789584340782075e-06,
      "loss": 46.0,
      "step": 1274
    },
    {
      "epoch": 0.21516263764080495,
      "grad_norm": 0.0009011728689074516,
      "learning_rate": 7.705792515110232e-06,
      "loss": 46.0,
      "step": 1275
    },
    {
      "epoch": 0.21533139265071932,
      "grad_norm": 0.0014264247147366405,
      "learning_rate": 7.622435737219069e-06,
      "loss": 46.0,
      "step": 1276
    },
    {
      "epoch": 0.21550014766063366,
      "grad_norm": 0.0006953159463591874,
      "learning_rate": 7.539514400027925e-06,
      "loss": 46.0,
      "step": 1277
    },
    {
      "epoch": 0.21566890267054803,
      "grad_norm": 0.001039333757944405,
      "learning_rate": 7.457028894403628e-06,
      "loss": 46.0,
      "step": 1278
    },
    {
      "epoch": 0.2158376576804624,
      "grad_norm": 0.000679376011248678,
      "learning_rate": 7.374979609158617e-06,
      "loss": 46.0,
      "step": 1279
    },
    {
      "epoch": 0.21600641269037674,
      "grad_norm": 0.000941453967243433,
      "learning_rate": 7.293366931049128e-06,
      "loss": 46.0,
      "step": 1280
    },
    {
      "epoch": 0.2161751677002911,
      "grad_norm": 0.0007799931336194277,
      "learning_rate": 7.212191244773336e-06,
      "loss": 46.0,
      "step": 1281
    },
    {
      "epoch": 0.21634392271020547,
      "grad_norm": 0.0007033965666778386,
      "learning_rate": 7.131452932969595e-06,
      "loss": 46.0,
      "step": 1282
    },
    {
      "epoch": 0.2165126777201198,
      "grad_norm": 0.001040134928189218,
      "learning_rate": 7.051152376214565e-06,
      "loss": 46.0,
      "step": 1283
    },
    {
      "epoch": 0.21668143273003418,
      "grad_norm": 0.0005914743524044752,
      "learning_rate": 6.9712899530215095e-06,
      "loss": 46.0,
      "step": 1284
    },
    {
      "epoch": 0.21685018773994852,
      "grad_norm": 0.0005753316800110042,
      "learning_rate": 6.891866039838391e-06,
      "loss": 46.0,
      "step": 1285
    },
    {
      "epoch": 0.2170189427498629,
      "grad_norm": 0.00033438560785725713,
      "learning_rate": 6.812881011046246e-06,
      "loss": 46.0,
      "step": 1286
    },
    {
      "epoch": 0.21718769775977725,
      "grad_norm": 0.000527498428709805,
      "learning_rate": 6.734335238957301e-06,
      "loss": 46.0,
      "step": 1287
    },
    {
      "epoch": 0.2173564527696916,
      "grad_norm": 0.0015326968859881163,
      "learning_rate": 6.656229093813182e-06,
      "loss": 46.0,
      "step": 1288
    },
    {
      "epoch": 0.21752520777960596,
      "grad_norm": 0.0006215130561031401,
      "learning_rate": 6.578562943783384e-06,
      "loss": 46.0,
      "step": 1289
    },
    {
      "epoch": 0.2176939627895203,
      "grad_norm": 0.0009670006693340838,
      "learning_rate": 6.501337154963305e-06,
      "loss": 46.0,
      "step": 1290
    },
    {
      "epoch": 0.21786271779943467,
      "grad_norm": 0.0005261996411718428,
      "learning_rate": 6.424552091372604e-06,
      "loss": 46.0,
      "step": 1291
    },
    {
      "epoch": 0.21803147280934904,
      "grad_norm": 0.000595112273003906,
      "learning_rate": 6.3482081149535355e-06,
      "loss": 46.0,
      "step": 1292
    },
    {
      "epoch": 0.21820022781926338,
      "grad_norm": 0.000555274193175137,
      "learning_rate": 6.272305585569172e-06,
      "loss": 46.0,
      "step": 1293
    },
    {
      "epoch": 0.21836898282917774,
      "grad_norm": 0.0012713369214907289,
      "learning_rate": 6.196844861001727e-06,
      "loss": 46.0,
      "step": 1294
    },
    {
      "epoch": 0.2185377378390921,
      "grad_norm": 0.0006671750452369452,
      "learning_rate": 6.1218262969509055e-06,
      "loss": 46.0,
      "step": 1295
    },
    {
      "epoch": 0.21870649284900645,
      "grad_norm": 0.001461333711631596,
      "learning_rate": 6.047250247032165e-06,
      "loss": 46.0,
      "step": 1296
    },
    {
      "epoch": 0.21887524785892082,
      "grad_norm": 0.000863353256136179,
      "learning_rate": 5.973117062775113e-06,
      "loss": 46.0,
      "step": 1297
    },
    {
      "epoch": 0.21904400286883516,
      "grad_norm": 0.0012815648224204779,
      "learning_rate": 5.899427093621768e-06,
      "loss": 46.0,
      "step": 1298
    },
    {
      "epoch": 0.21921275787874953,
      "grad_norm": 0.0009904132457450032,
      "learning_rate": 5.826180686925031e-06,
      "loss": 46.0,
      "step": 1299
    },
    {
      "epoch": 0.2193815128886639,
      "grad_norm": 0.0006559500470757484,
      "learning_rate": 5.753378187946967e-06,
      "loss": 46.0,
      "step": 1300
    },
    {
      "epoch": 0.21955026789857823,
      "grad_norm": 0.001320411916822195,
      "learning_rate": 5.6810199398570905e-06,
      "loss": 46.0,
      "step": 1301
    },
    {
      "epoch": 0.2197190229084926,
      "grad_norm": 0.00033066104515455663,
      "learning_rate": 5.609106283730991e-06,
      "loss": 46.0,
      "step": 1302
    },
    {
      "epoch": 0.21988777791840694,
      "grad_norm": 0.0012177408207207918,
      "learning_rate": 5.537637558548525e-06,
      "loss": 46.0,
      "step": 1303
    },
    {
      "epoch": 0.2200565329283213,
      "grad_norm": 0.00042670281254686415,
      "learning_rate": 5.4666141011922025e-06,
      "loss": 46.0,
      "step": 1304
    },
    {
      "epoch": 0.22022528793823568,
      "grad_norm": 0.0011261850595474243,
      "learning_rate": 5.39603624644579e-06,
      "loss": 46.0,
      "step": 1305
    },
    {
      "epoch": 0.22039404294815002,
      "grad_norm": 0.0009061134187504649,
      "learning_rate": 5.325904326992548e-06,
      "loss": 46.0,
      "step": 1306
    },
    {
      "epoch": 0.22056279795806438,
      "grad_norm": 0.0008035211358219385,
      "learning_rate": 5.2562186734137485e-06,
      "loss": 46.0,
      "step": 1307
    },
    {
      "epoch": 0.22073155296797875,
      "grad_norm": 0.0004889973206445575,
      "learning_rate": 5.186979614187071e-06,
      "loss": 46.0,
      "step": 1308
    },
    {
      "epoch": 0.2209003079778931,
      "grad_norm": 0.0006611088174395263,
      "learning_rate": 5.118187475685121e-06,
      "loss": 46.0,
      "step": 1309
    },
    {
      "epoch": 0.22106906298780746,
      "grad_norm": 0.0021591607946902514,
      "learning_rate": 5.049842582173814e-06,
      "loss": 46.0,
      "step": 1310
    },
    {
      "epoch": 0.2212378179977218,
      "grad_norm": 0.0015975041314959526,
      "learning_rate": 4.9819452558109045e-06,
      "loss": 46.0,
      "step": 1311
    },
    {
      "epoch": 0.22140657300763616,
      "grad_norm": 0.00249864743091166,
      "learning_rate": 4.914495816644449e-06,
      "loss": 46.0,
      "step": 1312
    },
    {
      "epoch": 0.22157532801755053,
      "grad_norm": 0.0006162183126434684,
      "learning_rate": 4.847494582611301e-06,
      "loss": 46.0,
      "step": 1313
    },
    {
      "epoch": 0.22174408302746487,
      "grad_norm": 0.0007919514318928123,
      "learning_rate": 4.78094186953556e-06,
      "loss": 46.0,
      "step": 1314
    },
    {
      "epoch": 0.22191283803737924,
      "grad_norm": 0.0022155398037284613,
      "learning_rate": 4.714837991127186e-06,
      "loss": 46.0,
      "step": 1315
    },
    {
      "epoch": 0.22208159304729358,
      "grad_norm": 0.0005009054439142346,
      "learning_rate": 4.6491832589804605e-06,
      "loss": 46.0,
      "step": 1316
    },
    {
      "epoch": 0.22225034805720795,
      "grad_norm": 0.0015528866788372397,
      "learning_rate": 4.583977982572463e-06,
      "loss": 46.0,
      "step": 1317
    },
    {
      "epoch": 0.22241910306712231,
      "grad_norm": 0.0009101424948312342,
      "learning_rate": 4.519222469261731e-06,
      "loss": 46.0,
      "step": 1318
    },
    {
      "epoch": 0.22258785807703665,
      "grad_norm": 0.000738188624382019,
      "learning_rate": 4.454917024286742e-06,
      "loss": 46.0,
      "step": 1319
    },
    {
      "epoch": 0.22275661308695102,
      "grad_norm": 0.0008253224659711123,
      "learning_rate": 4.391061950764453e-06,
      "loss": 46.0,
      "step": 1320
    },
    {
      "epoch": 0.2229253680968654,
      "grad_norm": 0.001060123322531581,
      "learning_rate": 4.327657549688935e-06,
      "loss": 46.0,
      "step": 1321
    },
    {
      "epoch": 0.22309412310677973,
      "grad_norm": 0.0007293847156688571,
      "learning_rate": 4.264704119929897e-06,
      "loss": 46.0,
      "step": 1322
    },
    {
      "epoch": 0.2232628781166941,
      "grad_norm": 0.00065107416594401,
      "learning_rate": 4.202201958231322e-06,
      "loss": 46.0,
      "step": 1323
    },
    {
      "epoch": 0.22343163312660844,
      "grad_norm": 0.0006281050737015903,
      "learning_rate": 4.140151359210043e-06,
      "loss": 46.0,
      "step": 1324
    },
    {
      "epoch": 0.2236003881365228,
      "grad_norm": 0.000606034358497709,
      "learning_rate": 4.078552615354325e-06,
      "loss": 46.0,
      "step": 1325
    },
    {
      "epoch": 0.22376914314643717,
      "grad_norm": 0.0006844737217761576,
      "learning_rate": 4.017406017022607e-06,
      "loss": 46.0,
      "step": 1326
    },
    {
      "epoch": 0.2239378981563515,
      "grad_norm": 0.000884491775650531,
      "learning_rate": 3.95671185244193e-06,
      "loss": 46.0,
      "step": 1327
    },
    {
      "epoch": 0.22410665316626588,
      "grad_norm": 0.0005510865012183785,
      "learning_rate": 3.896470407706798e-06,
      "loss": 46.0,
      "step": 1328
    },
    {
      "epoch": 0.22427540817618022,
      "grad_norm": 0.0006715485942550004,
      "learning_rate": 3.836681966777678e-06,
      "loss": 46.0,
      "step": 1329
    },
    {
      "epoch": 0.2244441631860946,
      "grad_norm": 0.001269484986551106,
      "learning_rate": 3.7773468114796764e-06,
      "loss": 46.0,
      "step": 1330
    },
    {
      "epoch": 0.22461291819600895,
      "grad_norm": 0.0022162743844091892,
      "learning_rate": 3.718465221501344e-06,
      "loss": 46.0,
      "step": 1331
    },
    {
      "epoch": 0.2247816732059233,
      "grad_norm": 0.00027326264535076916,
      "learning_rate": 3.660037474393152e-06,
      "loss": 46.0,
      "step": 1332
    },
    {
      "epoch": 0.22495042821583766,
      "grad_norm": 0.0005874920170754194,
      "learning_rate": 3.602063845566361e-06,
      "loss": 46.0,
      "step": 1333
    },
    {
      "epoch": 0.225119183225752,
      "grad_norm": 0.0009573047864250839,
      "learning_rate": 3.544544608291622e-06,
      "loss": 46.0,
      "step": 1334
    },
    {
      "epoch": 0.22528793823566637,
      "grad_norm": 0.0007656016387045383,
      "learning_rate": 3.487480033697721e-06,
      "loss": 46.0,
      "step": 1335
    },
    {
      "epoch": 0.22545669324558074,
      "grad_norm": 0.00044490606524050236,
      "learning_rate": 3.430870390770291e-06,
      "loss": 46.0,
      "step": 1336
    },
    {
      "epoch": 0.22562544825549508,
      "grad_norm": 0.0003960455651395023,
      "learning_rate": 3.3747159463505595e-06,
      "loss": 46.0,
      "step": 1337
    },
    {
      "epoch": 0.22579420326540944,
      "grad_norm": 0.000521904497873038,
      "learning_rate": 3.3190169651340806e-06,
      "loss": 46.0,
      "step": 1338
    },
    {
      "epoch": 0.2259629582753238,
      "grad_norm": 0.0004184047575108707,
      "learning_rate": 3.2637737096695154e-06,
      "loss": 46.0,
      "step": 1339
    },
    {
      "epoch": 0.22613171328523815,
      "grad_norm": 0.001061237882822752,
      "learning_rate": 3.2089864403572976e-06,
      "loss": 46.0,
      "step": 1340
    },
    {
      "epoch": 0.22630046829515252,
      "grad_norm": 0.000829793163575232,
      "learning_rate": 3.1546554154485486e-06,
      "loss": 46.0,
      "step": 1341
    },
    {
      "epoch": 0.22646922330506686,
      "grad_norm": 0.0010989286238327622,
      "learning_rate": 3.100780891043764e-06,
      "loss": 46.0,
      "step": 1342
    },
    {
      "epoch": 0.22663797831498123,
      "grad_norm": 0.000658164091873914,
      "learning_rate": 3.0473631210915954e-06,
      "loss": 46.0,
      "step": 1343
    },
    {
      "epoch": 0.2268067333248956,
      "grad_norm": 0.0018878680421039462,
      "learning_rate": 2.994402357387738e-06,
      "loss": 46.0,
      "step": 1344
    },
    {
      "epoch": 0.22697548833480993,
      "grad_norm": 0.0011438899673521519,
      "learning_rate": 2.9418988495736765e-06,
      "loss": 46.0,
      "step": 1345
    },
    {
      "epoch": 0.2271442433447243,
      "grad_norm": 0.0005721378256566823,
      "learning_rate": 2.889852845135521e-06,
      "loss": 46.0,
      "step": 1346
    },
    {
      "epoch": 0.22731299835463864,
      "grad_norm": 0.0004976931377314031,
      "learning_rate": 2.8382645894028393e-06,
      "loss": 46.0,
      "step": 1347
    },
    {
      "epoch": 0.227481753364553,
      "grad_norm": 0.00046737625962123275,
      "learning_rate": 2.7871343255475135e-06,
      "loss": 46.0,
      "step": 1348
    },
    {
      "epoch": 0.22765050837446738,
      "grad_norm": 0.0006955791031941772,
      "learning_rate": 2.736462294582598e-06,
      "loss": 46.0,
      "step": 1349
    },
    {
      "epoch": 0.22781926338438171,
      "grad_norm": 0.0017836546758189797,
      "learning_rate": 2.686248735361141e-06,
      "loss": 46.0,
      "step": 1350
    },
    {
      "epoch": 0.22798801839429608,
      "grad_norm": 0.000547305797226727,
      "learning_rate": 2.6364938845751196e-06,
      "loss": 46.0,
      "step": 1351
    },
    {
      "epoch": 0.22815677340421045,
      "grad_norm": 0.0018898543203249574,
      "learning_rate": 2.5871979767543077e-06,
      "loss": 46.0,
      "step": 1352
    },
    {
      "epoch": 0.2283255284141248,
      "grad_norm": 0.0005670526879839599,
      "learning_rate": 2.5383612442650973e-06,
      "loss": 46.0,
      "step": 1353
    },
    {
      "epoch": 0.22849428342403916,
      "grad_norm": 0.0006443196325562894,
      "learning_rate": 2.489983917309502e-06,
      "loss": 46.0,
      "step": 1354
    },
    {
      "epoch": 0.2286630384339535,
      "grad_norm": 0.000439947412814945,
      "learning_rate": 2.4420662239240666e-06,
      "loss": 46.0,
      "step": 1355
    },
    {
      "epoch": 0.22883179344386786,
      "grad_norm": 0.0012257567141205072,
      "learning_rate": 2.3946083899786697e-06,
      "loss": 46.0,
      "step": 1356
    },
    {
      "epoch": 0.22900054845378223,
      "grad_norm": 0.0011901329271495342,
      "learning_rate": 2.3476106391756347e-06,
      "loss": 46.0,
      "step": 1357
    },
    {
      "epoch": 0.22916930346369657,
      "grad_norm": 0.0009685796103440225,
      "learning_rate": 2.3010731930485417e-06,
      "loss": 46.0,
      "step": 1358
    },
    {
      "epoch": 0.22933805847361094,
      "grad_norm": 0.0011787050170823932,
      "learning_rate": 2.2549962709612293e-06,
      "loss": 46.0,
      "step": 1359
    },
    {
      "epoch": 0.22950681348352528,
      "grad_norm": 0.0006927854265086353,
      "learning_rate": 2.209380090106794e-06,
      "loss": 46.0,
      "step": 1360
    },
    {
      "epoch": 0.22967556849343965,
      "grad_norm": 0.0021828326862305403,
      "learning_rate": 2.164224865506492e-06,
      "loss": 46.0,
      "step": 1361
    },
    {
      "epoch": 0.22984432350335401,
      "grad_norm": 0.0011952252825722098,
      "learning_rate": 2.1195308100087964e-06,
      "loss": 46.0,
      "step": 1362
    },
    {
      "epoch": 0.23001307851326835,
      "grad_norm": 0.0008620006265118718,
      "learning_rate": 2.0752981342883504e-06,
      "loss": 46.0,
      "step": 1363
    },
    {
      "epoch": 0.23018183352318272,
      "grad_norm": 0.0007816816796548665,
      "learning_rate": 2.031527046844994e-06,
      "loss": 46.0,
      "step": 1364
    },
    {
      "epoch": 0.2303505885330971,
      "grad_norm": 0.00032747103250585496,
      "learning_rate": 1.9882177540027856e-06,
      "loss": 46.0,
      "step": 1365
    },
    {
      "epoch": 0.23051934354301143,
      "grad_norm": 0.0009276359342038631,
      "learning_rate": 1.9453704599089906e-06,
      "loss": 46.0,
      "step": 1366
    },
    {
      "epoch": 0.2306880985529258,
      "grad_norm": 0.0007578957010991871,
      "learning_rate": 1.902985366533172e-06,
      "loss": 46.0,
      "step": 1367
    },
    {
      "epoch": 0.23085685356284014,
      "grad_norm": 0.0009802145650610328,
      "learning_rate": 1.8610626736662252e-06,
      "loss": 46.0,
      "step": 1368
    },
    {
      "epoch": 0.2310256085727545,
      "grad_norm": 0.0017398048657923937,
      "learning_rate": 1.8196025789193771e-06,
      "loss": 46.0,
      "step": 1369
    },
    {
      "epoch": 0.23119436358266887,
      "grad_norm": 0.001226657535880804,
      "learning_rate": 1.7786052777233663e-06,
      "loss": 46.0,
      "step": 1370
    },
    {
      "epoch": 0.2313631185925832,
      "grad_norm": 0.0007180358516052365,
      "learning_rate": 1.7380709633274095e-06,
      "loss": 46.0,
      "step": 1371
    },
    {
      "epoch": 0.23153187360249758,
      "grad_norm": 0.002623442793264985,
      "learning_rate": 1.6979998267983577e-06,
      "loss": 46.0,
      "step": 1372
    },
    {
      "epoch": 0.23170062861241192,
      "grad_norm": 0.0011678035371005535,
      "learning_rate": 1.6583920570197752e-06,
      "loss": 46.0,
      "step": 1373
    },
    {
      "epoch": 0.23186938362232629,
      "grad_norm": 0.0005888472078368068,
      "learning_rate": 1.6192478406910626e-06,
      "loss": 46.0,
      "step": 1374
    },
    {
      "epoch": 0.23203813863224065,
      "grad_norm": 0.001251881243661046,
      "learning_rate": 1.5805673623265572e-06,
      "loss": 46.0,
      "step": 1375
    },
    {
      "epoch": 0.232206893642155,
      "grad_norm": 0.0005478327511809766,
      "learning_rate": 1.5423508042546553e-06,
      "loss": 46.0,
      "step": 1376
    },
    {
      "epoch": 0.23237564865206936,
      "grad_norm": 0.0005645382916554809,
      "learning_rate": 1.504598346616981e-06,
      "loss": 46.0,
      "step": 1377
    },
    {
      "epoch": 0.23254440366198373,
      "grad_norm": 0.0011800862848758698,
      "learning_rate": 1.4673101673675639e-06,
      "loss": 46.0,
      "step": 1378
    },
    {
      "epoch": 0.23271315867189807,
      "grad_norm": 0.0021919957362115383,
      "learning_rate": 1.4304864422718945e-06,
      "loss": 46.0,
      "step": 1379
    },
    {
      "epoch": 0.23288191368181244,
      "grad_norm": 0.000592592463362962,
      "learning_rate": 1.3941273449062041e-06,
      "loss": 46.0,
      "step": 1380
    },
    {
      "epoch": 0.23305066869172678,
      "grad_norm": 0.002734198933467269,
      "learning_rate": 1.3582330466565985e-06,
      "loss": 46.0,
      "step": 1381
    },
    {
      "epoch": 0.23321942370164114,
      "grad_norm": 0.0006839183042757213,
      "learning_rate": 1.3228037167182573e-06,
      "loss": 46.0,
      "step": 1382
    },
    {
      "epoch": 0.2333881787115555,
      "grad_norm": 0.0003775713557843119,
      "learning_rate": 1.2878395220946248e-06,
      "loss": 46.0,
      "step": 1383
    },
    {
      "epoch": 0.23355693372146985,
      "grad_norm": 0.00045846737339161336,
      "learning_rate": 1.253340627596644e-06,
      "loss": 46.0,
      "step": 1384
    },
    {
      "epoch": 0.23372568873138422,
      "grad_norm": 0.0007842537015676498,
      "learning_rate": 1.2193071958419788e-06,
      "loss": 46.0,
      "step": 1385
    },
    {
      "epoch": 0.23389444374129856,
      "grad_norm": 0.0012929689837619662,
      "learning_rate": 1.1857393872542255e-06,
      "loss": 46.0,
      "step": 1386
    },
    {
      "epoch": 0.23406319875121293,
      "grad_norm": 0.0015821013366803527,
      "learning_rate": 1.1526373600621699e-06,
      "loss": 46.0,
      "step": 1387
    },
    {
      "epoch": 0.2342319537611273,
      "grad_norm": 0.0006062084576115012,
      "learning_rate": 1.1200012702990425e-06,
      "loss": 46.0,
      "step": 1388
    },
    {
      "epoch": 0.23440070877104163,
      "grad_norm": 0.0006992828566581011,
      "learning_rate": 1.0878312718017868e-06,
      "loss": 46.0,
      "step": 1389
    },
    {
      "epoch": 0.234569463780956,
      "grad_norm": 0.001030724961310625,
      "learning_rate": 1.0561275162103035e-06,
      "loss": 46.0,
      "step": 1390
    },
    {
      "epoch": 0.23473821879087037,
      "grad_norm": 0.001839980366639793,
      "learning_rate": 1.0248901529668286e-06,
      "loss": 46.0,
      "step": 1391
    },
    {
      "epoch": 0.2349069738007847,
      "grad_norm": 0.0017480964306741953,
      "learning_rate": 9.941193293150796e-07,
      "loss": 46.0,
      "step": 1392
    },
    {
      "epoch": 0.23507572881069907,
      "grad_norm": 0.0008748589316383004,
      "learning_rate": 9.638151902996773e-07,
      "loss": 46.0,
      "step": 1393
    },
    {
      "epoch": 0.23524448382061341,
      "grad_norm": 0.0008244823548011482,
      "learning_rate": 9.339778787654574e-07,
      "loss": 46.0,
      "step": 1394
    },
    {
      "epoch": 0.23541323883052778,
      "grad_norm": 0.001938622328452766,
      "learning_rate": 9.046075353567163e-07,
      "loss": 46.0,
      "step": 1395
    },
    {
      "epoch": 0.23558199384044215,
      "grad_norm": 0.0008106924360617995,
      "learning_rate": 8.757042985166331e-07,
      "loss": 46.0,
      "step": 1396
    },
    {
      "epoch": 0.2357507488503565,
      "grad_norm": 0.0010104329558089375,
      "learning_rate": 8.472683044865815e-07,
      "loss": 46.0,
      "step": 1397
    },
    {
      "epoch": 0.23591950386027086,
      "grad_norm": 0.0008030526805669069,
      "learning_rate": 8.192996873054748e-07,
      "loss": 46.0,
      "step": 1398
    },
    {
      "epoch": 0.2360882588701852,
      "grad_norm": 0.003335982095450163,
      "learning_rate": 7.917985788091553e-07,
      "loss": 46.0,
      "step": 1399
    },
    {
      "epoch": 0.23625701388009956,
      "grad_norm": 0.0006814859807491302,
      "learning_rate": 7.647651086297835e-07,
      "loss": 46.0,
      "step": 1400
    },
    {
      "epoch": 0.23642576889001393,
      "grad_norm": 0.001208942150697112,
      "learning_rate": 7.381994041951945e-07,
      "loss": 46.0,
      "step": 1401
    },
    {
      "epoch": 0.23659452389992827,
      "grad_norm": 0.0026390880811959505,
      "learning_rate": 7.12101590728298e-07,
      "loss": 46.0,
      "step": 1402
    },
    {
      "epoch": 0.23676327890984264,
      "grad_norm": 0.0010781860910356045,
      "learning_rate": 6.864717912465346e-07,
      "loss": 46.0,
      "step": 1403
    },
    {
      "epoch": 0.23693203391975698,
      "grad_norm": 0.001180395483970642,
      "learning_rate": 6.613101265612431e-07,
      "loss": 46.0,
      "step": 1404
    },
    {
      "epoch": 0.23710078892967135,
      "grad_norm": 0.0006595318554900587,
      "learning_rate": 6.366167152771274e-07,
      "loss": 46.0,
      "step": 1405
    },
    {
      "epoch": 0.23726954393958571,
      "grad_norm": 0.0005041745025664568,
      "learning_rate": 6.123916737916568e-07,
      "loss": 46.0,
      "step": 1406
    },
    {
      "epoch": 0.23743829894950005,
      "grad_norm": 0.0013777145650237799,
      "learning_rate": 5.886351162945891e-07,
      "loss": 46.0,
      "step": 1407
    },
    {
      "epoch": 0.23760705395941442,
      "grad_norm": 0.0009335106587968767,
      "learning_rate": 5.653471547673262e-07,
      "loss": 46.0,
      "step": 1408
    },
    {
      "epoch": 0.2377758089693288,
      "grad_norm": 0.0007560536614619195,
      "learning_rate": 5.425278989824922e-07,
      "loss": 46.0,
      "step": 1409
    },
    {
      "epoch": 0.23794456397924313,
      "grad_norm": 0.0007272138609550893,
      "learning_rate": 5.201774565033679e-07,
      "loss": 46.0,
      "step": 1410
    },
    {
      "epoch": 0.2381133189891575,
      "grad_norm": 0.0035329603124409914,
      "learning_rate": 4.982959326833347e-07,
      "loss": 46.0,
      "step": 1411
    },
    {
      "epoch": 0.23828207399907184,
      "grad_norm": 0.0006473190151154995,
      "learning_rate": 4.76883430665509e-07,
      "loss": 46.0,
      "step": 1412
    },
    {
      "epoch": 0.2384508290089862,
      "grad_norm": 0.0016907091485336423,
      "learning_rate": 4.5594005138211993e-07,
      "loss": 46.0,
      "step": 1413
    },
    {
      "epoch": 0.23861958401890057,
      "grad_norm": 0.0016014818102121353,
      "learning_rate": 4.3546589355409894e-07,
      "loss": 46.0,
      "step": 1414
    },
    {
      "epoch": 0.2387883390288149,
      "grad_norm": 0.0006536655710078776,
      "learning_rate": 4.154610536906134e-07,
      "loss": 46.0,
      "step": 1415
    },
    {
      "epoch": 0.23895709403872928,
      "grad_norm": 0.0007482526707462966,
      "learning_rate": 3.959256260886113e-07,
      "loss": 46.0,
      "step": 1416
    },
    {
      "epoch": 0.23912584904864362,
      "grad_norm": 0.0012404083972796798,
      "learning_rate": 3.7685970283234395e-07,
      "loss": 46.0,
      "step": 1417
    },
    {
      "epoch": 0.23929460405855799,
      "grad_norm": 0.0006575691513717175,
      "learning_rate": 3.5826337379297746e-07,
      "loss": 46.0,
      "step": 1418
    },
    {
      "epoch": 0.23946335906847235,
      "grad_norm": 0.0005846316926181316,
      "learning_rate": 3.4013672662815964e-07,
      "loss": 46.0,
      "step": 1419
    },
    {
      "epoch": 0.2396321140783867,
      "grad_norm": 0.0015353142516687512,
      "learning_rate": 3.2247984678155374e-07,
      "loss": 46.0,
      "step": 1420
    },
    {
      "epoch": 0.23980086908830106,
      "grad_norm": 0.0007021796191111207,
      "learning_rate": 3.0529281748252757e-07,
      "loss": 46.0,
      "step": 1421
    },
    {
      "epoch": 0.23996962409821543,
      "grad_norm": 0.00043599194032140076,
      "learning_rate": 2.8857571974567623e-07,
      "loss": 46.0,
      "step": 1422
    },
    {
      "epoch": 0.24013837910812977,
      "grad_norm": 0.0008538305992260575,
      "learning_rate": 2.723286323704888e-07,
      "loss": 46.0,
      "step": 1423
    },
    {
      "epoch": 0.24030713411804414,
      "grad_norm": 0.0007377176661975682,
      "learning_rate": 2.565516319409711e-07,
      "loss": 46.0,
      "step": 1424
    },
    {
      "epoch": 0.24047588912795848,
      "grad_norm": 0.001053746440447867,
      "learning_rate": 2.412447928252459e-07,
      "loss": 46.0,
      "step": 1425
    },
    {
      "epoch": 0.24064464413787284,
      "grad_norm": 0.00034214206971228123,
      "learning_rate": 2.2640818717527546e-07,
      "loss": 46.0,
      "step": 1426
    },
    {
      "epoch": 0.2408133991477872,
      "grad_norm": 0.0006055811536498368,
      "learning_rate": 2.120418849264616e-07,
      "loss": 46.0,
      "step": 1427
    },
    {
      "epoch": 0.24098215415770155,
      "grad_norm": 0.0032110221218317747,
      "learning_rate": 1.9814595379732405e-07,
      "loss": 46.0,
      "step": 1428
    },
    {
      "epoch": 0.24115090916761592,
      "grad_norm": 0.0006545573123730719,
      "learning_rate": 1.8472045928920045e-07,
      "loss": 46.0,
      "step": 1429
    },
    {
      "epoch": 0.24131966417753026,
      "grad_norm": 0.0010023179929703474,
      "learning_rate": 1.7176546468592457e-07,
      "loss": 46.0,
      "step": 1430
    },
    {
      "epoch": 0.24148841918744463,
      "grad_norm": 0.0009001618018373847,
      "learning_rate": 1.5928103105354864e-07,
      "loss": 46.0,
      "step": 1431
    },
    {
      "epoch": 0.241657174197359,
      "grad_norm": 0.000825360300950706,
      "learning_rate": 1.4726721724001024e-07,
      "loss": 46.0,
      "step": 1432
    },
    {
      "epoch": 0.24182592920727333,
      "grad_norm": 0.0018932815873995423,
      "learning_rate": 1.3572407987491042e-07,
      "loss": 46.0,
      "step": 1433
    },
    {
      "epoch": 0.2419946842171877,
      "grad_norm": 0.0010684579610824585,
      "learning_rate": 1.2465167336920268e-07,
      "loss": 46.0,
      "step": 1434
    },
    {
      "epoch": 0.24216343922710207,
      "grad_norm": 0.0006767999730072916,
      "learning_rate": 1.1405004991495993e-07,
      "loss": 46.0,
      "step": 1435
    },
    {
      "epoch": 0.2423321942370164,
      "grad_norm": 0.0002995872055180371,
      "learning_rate": 1.0391925948511904e-07,
      "loss": 46.0,
      "step": 1436
    },
    {
      "epoch": 0.24250094924693077,
      "grad_norm": 0.0010398075683042407,
      "learning_rate": 9.425934983323669e-08,
      "loss": 46.0,
      "step": 1437
    },
    {
      "epoch": 0.24266970425684511,
      "grad_norm": 0.0006404778105206788,
      "learning_rate": 8.507036649331168e-08,
      "loss": 46.0,
      "step": 1438
    },
    {
      "epoch": 0.24283845926675948,
      "grad_norm": 0.0009962361073121428,
      "learning_rate": 7.635235277950736e-08,
      "loss": 46.0,
      "step": 1439
    },
    {
      "epoch": 0.24300721427667385,
      "grad_norm": 0.0007267189212143421,
      "learning_rate": 6.810534978597404e-08,
      "loss": 46.0,
      "step": 1440
    },
    {
      "epoch": 0.2431759692865882,
      "grad_norm": 0.000724569137673825,
      "learning_rate": 6.032939638664914e-08,
      "loss": 46.0,
      "step": 1441
    },
    {
      "epoch": 0.24334472429650256,
      "grad_norm": 0.0003782059939112514,
      "learning_rate": 5.3024529235112807e-08,
      "loss": 46.0,
      "step": 1442
    },
    {
      "epoch": 0.2435134793064169,
      "grad_norm": 0.001214990857988596,
      "learning_rate": 4.619078276436595e-08,
      "loss": 46.0,
      "step": 1443
    },
    {
      "epoch": 0.24368223431633126,
      "grad_norm": 0.0013174681225791574,
      "learning_rate": 3.982818918665254e-08,
      "loss": 46.0,
      "step": 1444
    },
    {
      "epoch": 0.24385098932624563,
      "grad_norm": 0.0019085375824943185,
      "learning_rate": 3.393677849340415e-08,
      "loss": 46.0,
      "step": 1445
    },
    {
      "epoch": 0.24401974433615997,
      "grad_norm": 0.0005340630887076259,
      "learning_rate": 2.8516578454973465e-08,
      "loss": 46.0,
      "step": 1446
    },
    {
      "epoch": 0.24418849934607434,
      "grad_norm": 0.0010641046101227403,
      "learning_rate": 2.3567614620600975e-08,
      "loss": 46.0,
      "step": 1447
    },
    {
      "epoch": 0.2443572543559887,
      "grad_norm": 0.0006958871381357312,
      "learning_rate": 1.9089910318259572e-08,
      "loss": 46.0,
      "step": 1448
    },
    {
      "epoch": 0.24452600936590305,
      "grad_norm": 0.0013515378814190626,
      "learning_rate": 1.5083486654510203e-08,
      "loss": 46.0,
      "step": 1449
    },
    {
      "epoch": 0.24469476437581741,
      "grad_norm": 0.0014520830009132624,
      "learning_rate": 1.1548362514479661e-08,
      "loss": 46.0,
      "step": 1450
    },
    {
      "epoch": 0.24486351938573175,
      "grad_norm": 0.0015810457989573479,
      "learning_rate": 8.484554561682956e-09,
      "loss": 46.0,
      "step": 1451
    },
    {
      "epoch": 0.24503227439564612,
      "grad_norm": 0.001024417346343398,
      "learning_rate": 5.8920772380233195e-09,
      "loss": 46.0,
      "step": 1452
    },
    {
      "epoch": 0.2452010294055605,
      "grad_norm": 0.000991427805274725,
      "learning_rate": 3.7709427636700714e-09,
      "loss": 46.0,
      "step": 1453
    },
    {
      "epoch": 0.24536978441547483,
      "grad_norm": 0.001307089813053608,
      "learning_rate": 2.1211611370475226e-09,
      "loss": 46.0,
      "step": 1454
    },
    {
      "epoch": 0.2455385394253892,
      "grad_norm": 0.0008045461145229638,
      "learning_rate": 9.427401347128495e-10,
      "loss": 46.0,
      "step": 1455
    },
    {
      "epoch": 0.24570729443530354,
      "grad_norm": 0.0017816101899370551,
      "learning_rate": 2.356853114116042e-10,
      "loss": 46.0,
      "step": 1456
    },
    {
      "epoch": 0.2458760494452179,
      "grad_norm": 0.000787016935646534,
      "learning_rate": 0.0,
      "loss": 46.0,
      "step": 1457
    }
  ],
  "logging_steps": 1,
  "max_steps": 1457,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 365,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 29027633922048.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}