{
  "best_metric": 1.6292288303375244,
  "best_model_checkpoint": "miner_id_24/checkpoint-200",
  "epoch": 0.9956382669380633,
  "eval_steps": 50,
  "global_step": 214,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.004652515266065717,
      "grad_norm": 3.1542959213256836,
      "learning_rate": 2e-05,
      "loss": 5.9362,
      "step": 1
    },
    {
      "epoch": 0.004652515266065717,
      "eval_loss": 5.9479522705078125,
      "eval_runtime": 20.8376,
      "eval_samples_per_second": 34.745,
      "eval_steps_per_second": 8.686,
      "step": 1
    },
    {
      "epoch": 0.009305030532131433,
      "grad_norm": 3.2570960521698,
      "learning_rate": 4e-05,
      "loss": 5.9508,
      "step": 2
    },
    {
      "epoch": 0.01395754579819715,
      "grad_norm": 2.7825522422790527,
      "learning_rate": 6e-05,
      "loss": 5.7132,
      "step": 3
    },
    {
      "epoch": 0.018610061064262867,
      "grad_norm": 3.4090566635131836,
      "learning_rate": 8e-05,
      "loss": 5.9836,
      "step": 4
    },
    {
      "epoch": 0.023262576330328584,
      "grad_norm": 3.439707040786743,
      "learning_rate": 0.0001,
      "loss": 5.7045,
      "step": 5
    },
    {
      "epoch": 0.0279150915963943,
      "grad_norm": 3.6563377380371094,
      "learning_rate": 0.00012,
      "loss": 5.8168,
      "step": 6
    },
    {
      "epoch": 0.03256760686246002,
      "grad_norm": 4.006112098693848,
      "learning_rate": 0.00014,
      "loss": 4.9581,
      "step": 7
    },
    {
      "epoch": 0.03722012212852573,
      "grad_norm": 4.275187015533447,
      "learning_rate": 0.00016,
      "loss": 4.7639,
      "step": 8
    },
    {
      "epoch": 0.041872637394591454,
      "grad_norm": 4.293889999389648,
      "learning_rate": 0.00018,
      "loss": 4.3398,
      "step": 9
    },
    {
      "epoch": 0.04652515266065717,
      "grad_norm": 4.2498321533203125,
      "learning_rate": 0.0002,
      "loss": 4.2132,
      "step": 10
    },
    {
      "epoch": 0.05117766792672288,
      "grad_norm": 3.1846446990966797,
      "learning_rate": 0.0001999881422898077,
      "loss": 4.0895,
      "step": 11
    },
    {
      "epoch": 0.0558301831927886,
      "grad_norm": 4.165881633758545,
      "learning_rate": 0.0001999525719713366,
      "loss": 3.5577,
      "step": 12
    },
    {
      "epoch": 0.06048269845885432,
      "grad_norm": 4.896974563598633,
      "learning_rate": 0.00019989329748023725,
      "loss": 3.7735,
      "step": 13
    },
    {
      "epoch": 0.06513521372492004,
      "grad_norm": 4.080206394195557,
      "learning_rate": 0.00019981033287370443,
      "loss": 3.4187,
      "step": 14
    },
    {
      "epoch": 0.06978772899098575,
      "grad_norm": 3.606748580932617,
      "learning_rate": 0.0001997036978271433,
      "loss": 3.1502,
      "step": 15
    },
    {
      "epoch": 0.07444024425705147,
      "grad_norm": 2.8589611053466797,
      "learning_rate": 0.00019957341762950344,
      "loss": 2.8054,
      "step": 16
    },
    {
      "epoch": 0.07909275952311719,
      "grad_norm": 2.743023633956909,
      "learning_rate": 0.00019941952317728147,
      "loss": 2.774,
      "step": 17
    },
    {
      "epoch": 0.08374527478918291,
      "grad_norm": 2.8571105003356934,
      "learning_rate": 0.0001992420509671936,
      "loss": 2.902,
      "step": 18
    },
    {
      "epoch": 0.08839779005524862,
      "grad_norm": 2.666266679763794,
      "learning_rate": 0.0001990410430875205,
      "loss": 2.8819,
      "step": 19
    },
    {
      "epoch": 0.09305030532131434,
      "grad_norm": 2.3131182193756104,
      "learning_rate": 0.00019881654720812594,
      "loss": 2.7994,
      "step": 20
    },
    {
      "epoch": 0.09770282058738006,
      "grad_norm": 2.048189640045166,
      "learning_rate": 0.00019856861656915143,
      "loss": 2.5718,
      "step": 21
    },
    {
      "epoch": 0.10235533585344576,
      "grad_norm": 1.92734694480896,
      "learning_rate": 0.0001982973099683902,
      "loss": 2.6741,
      "step": 22
    },
    {
      "epoch": 0.10700785111951149,
      "grad_norm": 1.5441073179244995,
      "learning_rate": 0.0001980026917473432,
      "loss": 2.4727,
      "step": 23
    },
    {
      "epoch": 0.1116603663855772,
      "grad_norm": 1.761895775794983,
      "learning_rate": 0.0001976848317759601,
      "loss": 2.3773,
      "step": 24
    },
    {
      "epoch": 0.11631288165164291,
      "grad_norm": 1.940789818763733,
      "learning_rate": 0.0001973438054360693,
      "loss": 2.4848,
      "step": 25
    },
    {
      "epoch": 0.12096539691770863,
      "grad_norm": 1.9712603092193604,
      "learning_rate": 0.00019697969360350098,
      "loss": 2.4821,
      "step": 26
    },
    {
      "epoch": 0.12561791218377436,
      "grad_norm": 1.70182466506958,
      "learning_rate": 0.00019659258262890683,
      "loss": 2.238,
      "step": 27
    },
    {
      "epoch": 0.13027042744984008,
      "grad_norm": 1.6575437784194946,
      "learning_rate": 0.00019618256431728194,
      "loss": 2.6379,
      "step": 28
    },
    {
      "epoch": 0.1349229427159058,
      "grad_norm": 1.502398133277893,
      "learning_rate": 0.00019574973590619243,
      "loss": 2.2527,
      "step": 29
    },
    {
      "epoch": 0.1395754579819715,
      "grad_norm": 1.6205648183822632,
      "learning_rate": 0.00019529420004271567,
      "loss": 2.2776,
      "step": 30
    },
    {
      "epoch": 0.1442279732480372,
      "grad_norm": 1.4457286596298218,
      "learning_rate": 0.0001948160647590966,
      "loss": 2.2438,
      "step": 31
    },
    {
      "epoch": 0.14888048851410293,
      "grad_norm": 1.5694420337677002,
      "learning_rate": 0.00019431544344712776,
      "loss": 2.1488,
      "step": 32
    },
    {
      "epoch": 0.15353300378016865,
      "grad_norm": 1.890031337738037,
      "learning_rate": 0.00019379245483125784,
      "loss": 2.0873,
      "step": 33
    },
    {
      "epoch": 0.15818551904623437,
      "grad_norm": 1.6246261596679688,
      "learning_rate": 0.00019324722294043558,
      "loss": 2.163,
      "step": 34
    },
    {
      "epoch": 0.1628380343123001,
      "grad_norm": 1.498121976852417,
      "learning_rate": 0.00019267987707869606,
      "loss": 2.0497,
      "step": 35
    },
    {
      "epoch": 0.16749054957836582,
      "grad_norm": 1.6934443712234497,
      "learning_rate": 0.0001920905517944954,
      "loss": 2.2254,
      "step": 36
    },
    {
      "epoch": 0.1721430648444315,
      "grad_norm": 2.14957594871521,
      "learning_rate": 0.0001914793868488021,
      "loss": 2.4021,
      "step": 37
    },
    {
      "epoch": 0.17679558011049723,
      "grad_norm": 1.7338439226150513,
      "learning_rate": 0.00019084652718195238,
      "loss": 2.5293,
      "step": 38
    },
    {
      "epoch": 0.18144809537656295,
      "grad_norm": 1.5174095630645752,
      "learning_rate": 0.00019019212287927663,
      "loss": 2.1819,
      "step": 39
    },
    {
      "epoch": 0.18610061064262867,
      "grad_norm": 1.547797679901123,
      "learning_rate": 0.00018951632913550626,
      "loss": 2.0053,
      "step": 40
    },
    {
      "epoch": 0.1907531259086944,
      "grad_norm": 1.5773917436599731,
      "learning_rate": 0.00018881930621796847,
      "loss": 2.273,
      "step": 41
    },
    {
      "epoch": 0.19540564117476011,
      "grad_norm": 1.5535215139389038,
      "learning_rate": 0.00018810121942857845,
      "loss": 2.0164,
      "step": 42
    },
    {
      "epoch": 0.20005815644082584,
      "grad_norm": 1.773815393447876,
      "learning_rate": 0.00018736223906463696,
      "loss": 2.2051,
      "step": 43
    },
    {
      "epoch": 0.20471067170689153,
      "grad_norm": 1.91315758228302,
      "learning_rate": 0.00018660254037844388,
      "loss": 2.0598,
      "step": 44
    },
    {
      "epoch": 0.20936318697295725,
      "grad_norm": 1.5708611011505127,
      "learning_rate": 0.00018582230353573627,
      "loss": 2.1581,
      "step": 45
    },
    {
      "epoch": 0.21401570223902297,
      "grad_norm": 1.4507153034210205,
      "learning_rate": 0.00018502171357296144,
      "loss": 2.0607,
      "step": 46
    },
    {
      "epoch": 0.2186682175050887,
      "grad_norm": 1.6080292463302612,
      "learning_rate": 0.00018420096035339452,
      "loss": 2.2465,
      "step": 47
    },
    {
      "epoch": 0.2233207327711544,
      "grad_norm": 1.536863923072815,
      "learning_rate": 0.00018336023852211195,
      "loss": 2.1152,
      "step": 48
    },
    {
      "epoch": 0.22797324803722013,
      "grad_norm": 1.4842332601547241,
      "learning_rate": 0.00018249974745983023,
      "loss": 2.0389,
      "step": 49
    },
    {
      "epoch": 0.23262576330328583,
      "grad_norm": 1.4118927717208862,
      "learning_rate": 0.0001816196912356222,
      "loss": 2.1003,
      "step": 50
    },
    {
      "epoch": 0.23262576330328583,
      "eval_loss": 2.0300514698028564,
      "eval_runtime": 20.9081,
      "eval_samples_per_second": 34.628,
      "eval_steps_per_second": 8.657,
      "step": 50
    },
    {
      "epoch": 0.23727827856935155,
      "grad_norm": 1.608886957168579,
      "learning_rate": 0.00018072027855852097,
      "loss": 2.2955,
      "step": 51
    },
    {
      "epoch": 0.24193079383541727,
      "grad_norm": 1.4855011701583862,
      "learning_rate": 0.000179801722728024,
      "loss": 1.9307,
      "step": 52
    },
    {
      "epoch": 0.246583309101483,
      "grad_norm": 1.465166449546814,
      "learning_rate": 0.00017886424158350782,
      "loss": 1.9173,
      "step": 53
    },
    {
      "epoch": 0.2512358243675487,
      "grad_norm": 1.79639732837677,
      "learning_rate": 0.00017790805745256704,
      "loss": 2.0639,
      "step": 54
    },
    {
      "epoch": 0.2558883396336144,
      "grad_norm": 1.683768630027771,
      "learning_rate": 0.00017693339709828792,
      "loss": 1.9898,
      "step": 55
    },
    {
      "epoch": 0.26054085489968015,
      "grad_norm": 1.7183611392974854,
      "learning_rate": 0.00017594049166547073,
      "loss": 2.0649,
      "step": 56
    },
    {
      "epoch": 0.26519337016574585,
      "grad_norm": 1.4495972394943237,
      "learning_rate": 0.00017492957662581295,
      "loss": 2.0176,
      "step": 57
    },
    {
      "epoch": 0.2698458854318116,
      "grad_norm": 1.500927448272705,
      "learning_rate": 0.00017390089172206592,
      "loss": 2.0764,
      "step": 58
    },
    {
      "epoch": 0.2744984006978773,
      "grad_norm": 1.5012840032577515,
      "learning_rate": 0.00017285468091117904,
      "loss": 1.8248,
      "step": 59
    },
    {
      "epoch": 0.279150915963943,
      "grad_norm": 1.5936579704284668,
      "learning_rate": 0.0001717911923064442,
      "loss": 2.0497,
      "step": 60
    },
    {
      "epoch": 0.28380343123000873,
      "grad_norm": 1.5540063381195068,
      "learning_rate": 0.00017071067811865476,
      "loss": 1.833,
      "step": 61
    },
    {
      "epoch": 0.2884559464960744,
      "grad_norm": 1.512662649154663,
      "learning_rate": 0.0001696133945962927,
      "loss": 1.9966,
      "step": 62
    },
    {
      "epoch": 0.29310846176214017,
      "grad_norm": 1.5647892951965332,
      "learning_rate": 0.00016849960196475806,
      "loss": 2.0752,
      "step": 63
    },
    {
      "epoch": 0.29776097702820586,
      "grad_norm": 1.5723665952682495,
      "learning_rate": 0.00016736956436465573,
      "loss": 1.9368,
      "step": 64
    },
    {
      "epoch": 0.3024134922942716,
      "grad_norm": 1.7973029613494873,
      "learning_rate": 0.00016622354978915304,
      "loss": 1.9789,
      "step": 65
    },
    {
      "epoch": 0.3070660075603373,
      "grad_norm": 1.3695393800735474,
      "learning_rate": 0.0001650618300204242,
      "loss": 1.7856,
      "step": 66
    },
    {
      "epoch": 0.311718522826403,
      "grad_norm": 1.7126473188400269,
      "learning_rate": 0.00016388468056519612,
      "loss": 1.8566,
      "step": 67
    },
    {
      "epoch": 0.31637103809246875,
      "grad_norm": 1.408624529838562,
      "learning_rate": 0.0001626923805894107,
      "loss": 1.6383,
      "step": 68
    },
    {
      "epoch": 0.32102355335853444,
      "grad_norm": 1.6226450204849243,
      "learning_rate": 0.00016148521285201927,
      "loss": 1.8517,
      "step": 69
    },
    {
      "epoch": 0.3256760686246002,
      "grad_norm": 1.6124584674835205,
      "learning_rate": 0.00016026346363792567,
      "loss": 1.7469,
      "step": 70
    },
    {
      "epoch": 0.3303285838906659,
      "grad_norm": 1.7233102321624756,
      "learning_rate": 0.00015902742269009197,
      "loss": 1.7507,
      "step": 71
    },
    {
      "epoch": 0.33498109915673163,
      "grad_norm": 1.468773603439331,
      "learning_rate": 0.00015777738314082514,
      "loss": 1.7616,
      "step": 72
    },
    {
      "epoch": 0.3396336144227973,
      "grad_norm": 1.9513323307037354,
      "learning_rate": 0.0001565136414422592,
      "loss": 2.0448,
      "step": 73
    },
    {
      "epoch": 0.344286129688863,
      "grad_norm": 1.96356999874115,
      "learning_rate": 0.0001552364972960506,
      "loss": 1.9519,
      "step": 74
    },
    {
      "epoch": 0.34893864495492877,
      "grad_norm": 1.7340761423110962,
      "learning_rate": 0.0001539462535823025,
      "loss": 1.9677,
      "step": 75
    },
    {
      "epoch": 0.35359116022099446,
      "grad_norm": 1.6672484874725342,
      "learning_rate": 0.0001526432162877356,
      "loss": 1.9008,
      "step": 76
    },
    {
      "epoch": 0.3582436754870602,
      "grad_norm": 1.7562329769134521,
      "learning_rate": 0.00015132769443312207,
      "loss": 1.834,
      "step": 77
    },
    {
      "epoch": 0.3628961907531259,
      "grad_norm": 1.6138277053833008,
      "learning_rate": 0.00015000000000000001,
      "loss": 2.0238,
      "step": 78
    },
    {
      "epoch": 0.36754870601919165,
      "grad_norm": 1.873212456703186,
      "learning_rate": 0.00014866044785668563,
      "loss": 1.8391,
      "step": 79
    },
    {
      "epoch": 0.37220122128525734,
      "grad_norm": 1.608636736869812,
      "learning_rate": 0.00014730935568360102,
      "loss": 1.6384,
      "step": 80
    },
    {
      "epoch": 0.37685373655132304,
      "grad_norm": 1.6646443605422974,
      "learning_rate": 0.00014594704389793477,
      "loss": 1.8669,
      "step": 81
    },
    {
      "epoch": 0.3815062518173888,
      "grad_norm": 1.522123098373413,
      "learning_rate": 0.00014457383557765386,
      "loss": 1.9653,
      "step": 82
    },
    {
      "epoch": 0.3861587670834545,
      "grad_norm": 1.595712423324585,
      "learning_rate": 0.0001431900563848841,
      "loss": 1.8904,
      "step": 83
    },
    {
      "epoch": 0.39081128234952023,
      "grad_norm": 1.62184739112854,
      "learning_rate": 0.00014179603448867835,
      "loss": 1.7053,
      "step": 84
    },
    {
      "epoch": 0.3954637976155859,
      "grad_norm": 1.5826489925384521,
      "learning_rate": 0.00014039210048718949,
      "loss": 1.9059,
      "step": 85
    },
    {
      "epoch": 0.40011631288165167,
      "grad_norm": 1.608913540840149,
      "learning_rate": 0.00013897858732926793,
      "loss": 1.8071,
      "step": 86
    },
    {
      "epoch": 0.40476882814771736,
      "grad_norm": 1.504594326019287,
      "learning_rate": 0.00013755583023550126,
      "loss": 1.6661,
      "step": 87
    },
    {
      "epoch": 0.40942134341378306,
      "grad_norm": 1.6623263359069824,
      "learning_rate": 0.00013612416661871533,
      "loss": 1.8381,
      "step": 88
    },
    {
      "epoch": 0.4140738586798488,
      "grad_norm": 1.5866388082504272,
      "learning_rate": 0.00013468393600395525,
      "loss": 1.7955,
      "step": 89
    },
    {
      "epoch": 0.4187263739459145,
      "grad_norm": 1.596091389656067,
      "learning_rate": 0.00013323547994796597,
      "loss": 1.7176,
      "step": 90
    },
    {
      "epoch": 0.42337888921198025,
      "grad_norm": 1.6838603019714355,
      "learning_rate": 0.00013177914195819016,
      "loss": 1.7935,
      "step": 91
    },
    {
      "epoch": 0.42803140447804594,
      "grad_norm": 1.6236923933029175,
      "learning_rate": 0.00013031526741130435,
      "loss": 1.7203,
      "step": 92
    },
    {
      "epoch": 0.43268391974411163,
      "grad_norm": 1.6118321418762207,
      "learning_rate": 0.00012884420347131123,
      "loss": 1.851,
      "step": 93
    },
    {
      "epoch": 0.4373364350101774,
      "grad_norm": 1.6473873853683472,
      "learning_rate": 0.0001273662990072083,
      "loss": 1.8701,
      "step": 94
    },
    {
      "epoch": 0.4419889502762431,
      "grad_norm": 1.7001062631607056,
      "learning_rate": 0.00012588190451025207,
      "loss": 1.7731,
      "step": 95
    },
    {
      "epoch": 0.4466414655423088,
      "grad_norm": 1.6646205186843872,
      "learning_rate": 0.00012439137201083773,
      "loss": 2.0302,
      "step": 96
    },
    {
      "epoch": 0.4512939808083745,
      "grad_norm": 1.8173680305480957,
      "learning_rate": 0.0001228950549950134,
      "loss": 1.8191,
      "step": 97
    },
    {
      "epoch": 0.45594649607444027,
      "grad_norm": 1.6642239093780518,
      "learning_rate": 0.00012139330832064974,
      "loss": 1.7608,
      "step": 98
    },
    {
      "epoch": 0.46059901134050596,
      "grad_norm": 1.5319768190383911,
      "learning_rate": 0.00011988648813328367,
      "loss": 1.7914,
      "step": 99
    },
    {
      "epoch": 0.46525152660657165,
      "grad_norm": 1.56061851978302,
      "learning_rate": 0.00011837495178165706,
      "loss": 1.98,
      "step": 100
    },
    {
      "epoch": 0.46525152660657165,
      "eval_loss": 1.8041728734970093,
      "eval_runtime": 20.9031,
      "eval_samples_per_second": 34.636,
      "eval_steps_per_second": 8.659,
      "step": 100
    },
    {
      "epoch": 0.4699040418726374,
      "grad_norm": 1.6357735395431519,
      "learning_rate": 0.00011685905773296992,
      "loss": 1.7414,
      "step": 101
    },
    {
      "epoch": 0.4745565571387031,
      "grad_norm": 1.675748348236084,
      "learning_rate": 0.00011533916548786857,
      "loss": 1.8028,
      "step": 102
    },
    {
      "epoch": 0.47920907240476884,
      "grad_norm": 1.55147385597229,
      "learning_rate": 0.00011381563549518823,
      "loss": 1.5612,
      "step": 103
    },
    {
      "epoch": 0.48386158767083454,
      "grad_norm": 1.6434597969055176,
      "learning_rate": 0.00011228882906647142,
      "loss": 1.8869,
      "step": 104
    },
    {
      "epoch": 0.4885141029369003,
      "grad_norm": 1.544256567955017,
      "learning_rate": 0.00011075910829028115,
      "loss": 1.6021,
      "step": 105
    },
    {
      "epoch": 0.493166618202966,
      "grad_norm": 1.5358442068099976,
      "learning_rate": 0.00010922683594633021,
      "loss": 1.8826,
      "step": 106
    },
    {
      "epoch": 0.49781913346903167,
      "grad_norm": 1.6584852933883667,
      "learning_rate": 0.0001076923754194464,
      "loss": 1.5318,
      "step": 107
    },
    {
      "epoch": 0.5024716487350974,
      "grad_norm": 1.5296037197113037,
      "learning_rate": 0.00010615609061339432,
      "loss": 1.5062,
      "step": 108
    },
    {
      "epoch": 0.5071241640011631,
      "grad_norm": 1.638400912284851,
      "learning_rate": 0.00010461834586457398,
      "loss": 1.5932,
      "step": 109
    },
    {
      "epoch": 0.5117766792672288,
      "grad_norm": 1.72173011302948,
      "learning_rate": 0.00010307950585561706,
      "loss": 1.7414,
      "step": 110
    },
    {
      "epoch": 0.5164291945332946,
      "grad_norm": 1.6859912872314453,
      "learning_rate": 0.00010153993552890069,
      "loss": 1.8711,
      "step": 111
    },
    {
      "epoch": 0.5210817097993603,
      "grad_norm": 1.7842934131622314,
      "learning_rate": 0.0001,
      "loss": 1.6803,
      "step": 112
    },
    {
      "epoch": 0.525734225065426,
      "grad_norm": 1.5940512418746948,
      "learning_rate": 9.846006447109933e-05,
      "loss": 1.5046,
      "step": 113
    },
    {
      "epoch": 0.5303867403314917,
      "grad_norm": 1.9007431268692017,
      "learning_rate": 9.692049414438299e-05,
      "loss": 1.9239,
      "step": 114
    },
    {
      "epoch": 0.5350392555975574,
      "grad_norm": 1.940891146659851,
      "learning_rate": 9.538165413542607e-05,
      "loss": 1.8778,
      "step": 115
    },
    {
      "epoch": 0.5396917708636232,
      "grad_norm": 1.5742617845535278,
      "learning_rate": 9.384390938660572e-05,
      "loss": 1.5219,
      "step": 116
    },
    {
      "epoch": 0.5443442861296889,
      "grad_norm": 1.632001519203186,
      "learning_rate": 9.230762458055363e-05,
      "loss": 1.7918,
      "step": 117
    },
    {
      "epoch": 0.5489968013957546,
      "grad_norm": 1.7606934309005737,
      "learning_rate": 9.077316405366981e-05,
      "loss": 1.6797,
      "step": 118
    },
    {
      "epoch": 0.5536493166618203,
      "grad_norm": 1.8048553466796875,
      "learning_rate": 8.924089170971887e-05,
      "loss": 1.7574,
      "step": 119
    },
    {
      "epoch": 0.558301831927886,
      "grad_norm": 1.7053390741348267,
      "learning_rate": 8.77111709335286e-05,
      "loss": 1.6776,
      "step": 120
    },
    {
      "epoch": 0.5629543471939518,
      "grad_norm": 1.7866935729980469,
      "learning_rate": 8.61843645048118e-05,
      "loss": 1.676,
      "step": 121
    },
    {
      "epoch": 0.5676068624600175,
      "grad_norm": 1.7857139110565186,
      "learning_rate": 8.466083451213144e-05,
      "loss": 1.9838,
      "step": 122
    },
    {
      "epoch": 0.5722593777260832,
      "grad_norm": 1.7687913179397583,
      "learning_rate": 8.314094226703007e-05,
      "loss": 1.9557,
      "step": 123
    },
    {
      "epoch": 0.5769118929921488,
      "grad_norm": 1.9080584049224854,
      "learning_rate": 8.162504821834295e-05,
      "loss": 1.6932,
      "step": 124
    },
    {
      "epoch": 0.5815644082582146,
      "grad_norm": 1.7965078353881836,
      "learning_rate": 8.011351186671637e-05,
      "loss": 1.8605,
      "step": 125
    },
    {
      "epoch": 0.5862169235242803,
      "grad_norm": 1.7524250745773315,
      "learning_rate": 7.860669167935028e-05,
      "loss": 1.7335,
      "step": 126
    },
    {
      "epoch": 0.590869438790346,
      "grad_norm": 1.721587061882019,
      "learning_rate": 7.710494500498662e-05,
      "loss": 1.7249,
      "step": 127
    },
    {
      "epoch": 0.5955219540564117,
      "grad_norm": 1.7121999263763428,
      "learning_rate": 7.560862798916228e-05,
      "loss": 1.7491,
      "step": 128
    },
    {
      "epoch": 0.6001744693224774,
      "grad_norm": 1.5758925676345825,
      "learning_rate": 7.411809548974792e-05,
      "loss": 1.709,
      "step": 129
    },
    {
      "epoch": 0.6048269845885432,
      "grad_norm": 1.5831133127212524,
      "learning_rate": 7.263370099279172e-05,
      "loss": 1.4999,
      "step": 130
    },
    {
      "epoch": 0.6094794998546089,
      "grad_norm": 1.7787184715270996,
      "learning_rate": 7.115579652868878e-05,
      "loss": 1.7472,
      "step": 131
    },
    {
      "epoch": 0.6141320151206746,
      "grad_norm": 1.6525014638900757,
      "learning_rate": 6.968473258869566e-05,
      "loss": 1.7104,
      "step": 132
    },
    {
      "epoch": 0.6187845303867403,
      "grad_norm": 1.8373721837997437,
      "learning_rate": 6.822085804180984e-05,
      "loss": 1.8997,
      "step": 133
    },
    {
      "epoch": 0.623437045652806,
      "grad_norm": 1.6728414297103882,
      "learning_rate": 6.676452005203406e-05,
      "loss": 1.5062,
      "step": 134
    },
    {
      "epoch": 0.6280895609188718,
      "grad_norm": 1.663562297821045,
      "learning_rate": 6.531606399604473e-05,
      "loss": 1.7005,
      "step": 135
    },
    {
      "epoch": 0.6327420761849375,
      "grad_norm": 1.7047648429870605,
      "learning_rate": 6.387583338128471e-05,
      "loss": 1.6651,
      "step": 136
    },
    {
      "epoch": 0.6373945914510032,
      "grad_norm": 1.8546326160430908,
      "learning_rate": 6.244416976449875e-05,
      "loss": 1.7414,
      "step": 137
    },
    {
      "epoch": 0.6420471067170689,
      "grad_norm": 1.8506556749343872,
      "learning_rate": 6.102141267073207e-05,
      "loss": 1.5764,
      "step": 138
    },
    {
      "epoch": 0.6466996219831347,
      "grad_norm": 1.863836407661438,
      "learning_rate": 5.960789951281052e-05,
      "loss": 1.8554,
      "step": 139
    },
    {
      "epoch": 0.6513521372492004,
      "grad_norm": 1.5789958238601685,
      "learning_rate": 5.82039655113217e-05,
      "loss": 1.7332,
      "step": 140
    },
    {
      "epoch": 0.6560046525152661,
      "grad_norm": 1.8007911443710327,
      "learning_rate": 5.680994361511591e-05,
      "loss": 1.7878,
      "step": 141
    },
    {
      "epoch": 0.6606571677813318,
      "grad_norm": 1.7672853469848633,
      "learning_rate": 5.542616442234618e-05,
      "loss": 1.59,
      "step": 142
    },
    {
      "epoch": 0.6653096830473975,
      "grad_norm": 1.6216075420379639,
      "learning_rate": 5.4052956102065246e-05,
      "loss": 1.4842,
      "step": 143
    },
    {
      "epoch": 0.6699621983134633,
      "grad_norm": 1.8410775661468506,
      "learning_rate": 5.269064431639901e-05,
      "loss": 1.6629,
      "step": 144
    },
    {
      "epoch": 0.674614713579529,
      "grad_norm": 1.8751968145370483,
      "learning_rate": 5.1339552143314384e-05,
      "loss": 1.6441,
      "step": 145
    },
    {
      "epoch": 0.6792672288455947,
      "grad_norm": 1.913163661956787,
      "learning_rate": 5.000000000000002e-05,
      "loss": 1.6159,
      "step": 146
    },
    {
      "epoch": 0.6839197441116603,
      "grad_norm": 1.6089226007461548,
      "learning_rate": 4.8672305566877964e-05,
      "loss": 1.6247,
      "step": 147
    },
    {
      "epoch": 0.688572259377726,
      "grad_norm": 1.820172667503357,
      "learning_rate": 4.735678371226441e-05,
      "loss": 1.8709,
      "step": 148
    },
    {
      "epoch": 0.6932247746437918,
      "grad_norm": 1.7547824382781982,
      "learning_rate": 4.605374641769752e-05,
      "loss": 1.4424,
      "step": 149
    },
    {
      "epoch": 0.6978772899098575,
      "grad_norm": 1.677914023399353,
      "learning_rate": 4.476350270394942e-05,
      "loss": 1.7515,
      "step": 150
    },
    {
      "epoch": 0.6978772899098575,
      "eval_loss": 1.6791523694992065,
      "eval_runtime": 20.9234,
      "eval_samples_per_second": 34.602,
      "eval_steps_per_second": 8.651,
      "step": 150
    },
    {
      "epoch": 0.7025298051759232,
      "grad_norm": 1.8747222423553467,
      "learning_rate": 4.3486358557740814e-05,
      "loss": 1.8838,
      "step": 151
    },
    {
      "epoch": 0.7071823204419889,
      "grad_norm": 1.7761543989181519,
      "learning_rate": 4.222261685917489e-05,
      "loss": 1.6168,
      "step": 152
    },
    {
      "epoch": 0.7118348357080546,
      "grad_norm": 1.6737406253814697,
      "learning_rate": 4.0972577309908056e-05,
      "loss": 1.6401,
      "step": 153
    },
    {
      "epoch": 0.7164873509741204,
      "grad_norm": 1.9486554861068726,
      "learning_rate": 3.973653636207437e-05,
      "loss": 1.8098,
      "step": 154
    },
    {
      "epoch": 0.7211398662401861,
      "grad_norm": 1.7520830631256104,
      "learning_rate": 3.851478714798076e-05,
      "loss": 1.7116,
      "step": 155
    },
    {
      "epoch": 0.7257923815062518,
      "grad_norm": 1.7359018325805664,
      "learning_rate": 3.7307619410589376e-05,
      "loss": 1.8196,
      "step": 156
    },
    {
      "epoch": 0.7304448967723175,
      "grad_norm": 1.5514014959335327,
      "learning_rate": 3.6115319434803894e-05,
      "loss": 1.3944,
      "step": 157
    },
    {
      "epoch": 0.7350974120383833,
      "grad_norm": 1.8193638324737549,
      "learning_rate": 3.493816997957582e-05,
      "loss": 1.8875,
      "step": 158
    },
    {
      "epoch": 0.739749927304449,
      "grad_norm": 1.7850615978240967,
      "learning_rate": 3.377645021084701e-05,
      "loss": 1.551,
      "step": 159
    },
    {
      "epoch": 0.7444024425705147,
      "grad_norm": 1.8897948265075684,
      "learning_rate": 3.263043563534428e-05,
      "loss": 1.6699,
      "step": 160
    },
    {
      "epoch": 0.7490549578365804,
      "grad_norm": 1.7700817584991455,
      "learning_rate": 3.150039803524194e-05,
      "loss": 1.5546,
      "step": 161
    },
    {
      "epoch": 0.7537074731026461,
      "grad_norm": 1.7360986471176147,
      "learning_rate": 3.0386605403707346e-05,
      "loss": 1.8158,
      "step": 162
    },
    {
      "epoch": 0.7583599883687119,
      "grad_norm": 1.7811108827590942,
      "learning_rate": 2.9289321881345254e-05,
      "loss": 1.7055,
      "step": 163
    },
    {
      "epoch": 0.7630125036347776,
      "grad_norm": 1.7199565172195435,
      "learning_rate": 2.8208807693555818e-05,
      "loss": 1.7042,
      "step": 164
    },
    {
      "epoch": 0.7676650189008433,
      "grad_norm": 1.774168610572815,
      "learning_rate": 2.7145319088820987e-05,
      "loss": 1.622,
      "step": 165
    },
    {
      "epoch": 0.772317534166909,
      "grad_norm": 1.7940188646316528,
      "learning_rate": 2.6099108277934103e-05,
      "loss": 1.5445,
      "step": 166
    },
    {
      "epoch": 0.7769700494329747,
      "grad_norm": 1.5857652425765991,
      "learning_rate": 2.507042337418707e-05,
      "loss": 1.5953,
      "step": 167
    },
    {
      "epoch": 0.7816225646990405,
      "grad_norm": 1.7121484279632568,
      "learning_rate": 2.405950833452928e-05,
      "loss": 1.5087,
      "step": 168
    },
    {
      "epoch": 0.7862750799651061,
      "grad_norm": 1.9980762004852295,
      "learning_rate": 2.3066602901712108e-05,
      "loss": 1.8725,
      "step": 169
    },
    {
      "epoch": 0.7909275952311718,
      "grad_norm": 1.7364083528518677,
      "learning_rate": 2.2091942547432955e-05,
      "loss": 1.7714,
      "step": 170
    },
    {
      "epoch": 0.7955801104972375,
      "grad_norm": 1.6725072860717773,
      "learning_rate": 2.113575841649217e-05,
      "loss": 1.4332,
      "step": 171
    },
    {
      "epoch": 0.8002326257633033,
      "grad_norm": 1.9727587699890137,
      "learning_rate": 2.0198277271976052e-05,
      "loss": 1.6559,
      "step": 172
    },
    {
      "epoch": 0.804885141029369,
      "grad_norm": 1.7346326112747192,
      "learning_rate": 1.927972144147905e-05,
      "loss": 1.5057,
      "step": 173
    },
    {
      "epoch": 0.8095376562954347,
      "grad_norm": 1.7290587425231934,
      "learning_rate": 1.8380308764377842e-05,
      "loss": 1.6469,
      "step": 174
    },
    {
      "epoch": 0.8141901715615004,
      "grad_norm": 1.8323500156402588,
      "learning_rate": 1.750025254016978e-05,
      "loss": 1.4771,
      "step": 175
    },
    {
      "epoch": 0.8188426868275661,
      "grad_norm": 1.9538246393203735,
      "learning_rate": 1.663976147788806e-05,
      "loss": 1.7257,
      "step": 176
    },
    {
      "epoch": 0.8234952020936319,
      "grad_norm": 1.7197211980819702,
      "learning_rate": 1.5799039646605486e-05,
      "loss": 1.6527,
      "step": 177
    },
    {
      "epoch": 0.8281477173596976,
      "grad_norm": 1.805812120437622,
      "learning_rate": 1.4978286427038601e-05,
      "loss": 1.5049,
      "step": 178
    },
    {
      "epoch": 0.8328002326257633,
      "grad_norm": 1.8770802021026611,
      "learning_rate": 1.4177696464263723e-05,
      "loss": 1.7229,
      "step": 179
    },
    {
      "epoch": 0.837452747891829,
      "grad_norm": 1.6201673746109009,
      "learning_rate": 1.339745962155613e-05,
      "loss": 1.4748,
      "step": 180
    },
    {
      "epoch": 0.8421052631578947,
      "grad_norm": 1.7318499088287354,
      "learning_rate": 1.2637760935363053e-05,
      "loss": 1.5424,
      "step": 181
    },
    {
      "epoch": 0.8467577784239605,
      "grad_norm": 1.6393368244171143,
      "learning_rate": 1.1898780571421552e-05,
      "loss": 1.5458,
      "step": 182
    },
    {
      "epoch": 0.8514102936900262,
      "grad_norm": 1.7201930284500122,
      "learning_rate": 1.1180693782031516e-05,
      "loss": 1.7075,
      "step": 183
    },
    {
      "epoch": 0.8560628089560919,
      "grad_norm": 1.825761079788208,
      "learning_rate": 1.0483670864493778e-05,
      "loss": 1.8075,
      "step": 184
    },
    {
      "epoch": 0.8607153242221576,
      "grad_norm": 1.7473479509353638,
      "learning_rate": 9.807877120723396e-06,
      "loss": 1.5959,
      "step": 185
    },
    {
      "epoch": 0.8653678394882233,
      "grad_norm": 1.7737841606140137,
      "learning_rate": 9.153472818047625e-06,
      "loss": 1.4923,
      "step": 186
    },
    {
      "epoch": 0.8700203547542891,
      "grad_norm": 1.8927415609359741,
      "learning_rate": 8.520613151197898e-06,
      "loss": 1.5957,
      "step": 187
    },
    {
      "epoch": 0.8746728700203548,
      "grad_norm": 1.8504620790481567,
      "learning_rate": 7.909448205504632e-06,
      "loss": 1.7199,
      "step": 188
    },
    {
      "epoch": 0.8793253852864205,
      "grad_norm": 1.9773368835449219,
      "learning_rate": 7.320122921303962e-06,
      "loss": 1.673,
      "step": 189
    },
    {
      "epoch": 0.8839779005524862,
      "grad_norm": 1.729998230934143,
      "learning_rate": 6.75277705956443e-06,
      "loss": 1.6096,
      "step": 190
    },
    {
      "epoch": 0.888630415818552,
      "grad_norm": 1.628357172012329,
      "learning_rate": 6.2075451687422124e-06,
      "loss": 1.4744,
      "step": 191
    },
    {
      "epoch": 0.8932829310846176,
      "grad_norm": 1.7737938165664673,
      "learning_rate": 5.684556552872256e-06,
      "loss": 1.8465,
      "step": 192
    },
    {
      "epoch": 0.8979354463506833,
      "grad_norm": 1.7610527276992798,
      "learning_rate": 5.183935240903414e-06,
      "loss": 1.5833,
      "step": 193
    },
    {
      "epoch": 0.902587961616749,
      "grad_norm": 1.850711703300476,
      "learning_rate": 4.705799957284351e-06,
      "loss": 1.6079,
      "step": 194
    },
    {
      "epoch": 0.9072404768828147,
      "grad_norm": 1.5661609172821045,
      "learning_rate": 4.250264093807565e-06,
      "loss": 1.4349,
      "step": 195
    },
    {
      "epoch": 0.9118929921488805,
      "grad_norm": 1.8847752809524536,
      "learning_rate": 3.817435682718096e-06,
      "loss": 1.6189,
      "step": 196
    },
    {
      "epoch": 0.9165455074149462,
      "grad_norm": 1.8437247276306152,
      "learning_rate": 3.40741737109318e-06,
      "loss": 1.6144,
      "step": 197
    },
    {
      "epoch": 0.9211980226810119,
      "grad_norm": 1.9048221111297607,
      "learning_rate": 3.0203063964990617e-06,
      "loss": 1.6744,
      "step": 198
    },
    {
      "epoch": 0.9258505379470776,
      "grad_norm": 1.8907570838928223,
      "learning_rate": 2.656194563930714e-06,
      "loss": 1.7463,
      "step": 199
    },
    {
      "epoch": 0.9305030532131433,
      "grad_norm": 1.9094120264053345,
      "learning_rate": 2.315168224039932e-06,
      "loss": 1.8705,
      "step": 200
    },
    {
      "epoch": 0.9305030532131433,
      "eval_loss": 1.6292288303375244,
      "eval_runtime": 20.9189,
      "eval_samples_per_second": 34.61,
      "eval_steps_per_second": 8.652,
      "step": 200
    },
    {
      "epoch": 0.9351555684792091,
      "grad_norm": 1.598107933998108,
      "learning_rate": 1.9973082526568154e-06,
      "loss": 1.4567,
      "step": 201
    },
    {
      "epoch": 0.9398080837452748,
      "grad_norm": 2.187147855758667,
      "learning_rate": 1.7026900316098215e-06,
      "loss": 1.6859,
      "step": 202
    },
    {
      "epoch": 0.9444605990113405,
      "grad_norm": 1.804359793663025,
      "learning_rate": 1.4313834308486097e-06,
      "loss": 1.614,
      "step": 203
    },
    {
      "epoch": 0.9491131142774062,
      "grad_norm": 1.7503759860992432,
      "learning_rate": 1.1834527918740623e-06,
      "loss": 1.68,
      "step": 204
    },
    {
      "epoch": 0.953765629543472,
      "grad_norm": 2.0529308319091797,
      "learning_rate": 9.589569124794916e-07,
      "loss": 1.7563,
      "step": 205
    },
    {
      "epoch": 0.9584181448095377,
      "grad_norm": 1.7820945978164673,
      "learning_rate": 7.579490328064265e-07,
      "loss": 1.516,
      "step": 206
    },
    {
      "epoch": 0.9630706600756034,
      "grad_norm": 1.8575091361999512,
      "learning_rate": 5.804768227185565e-07,
      "loss": 1.6248,
      "step": 207
    },
    {
      "epoch": 0.9677231753416691,
      "grad_norm": 1.8180886507034302,
      "learning_rate": 4.2658237049655323e-07,
      "loss": 1.6101,
      "step": 208
    },
    {
      "epoch": 0.9723756906077348,
      "grad_norm": 1.6702853441238403,
      "learning_rate": 2.963021728567106e-07,
      "loss": 1.5597,
      "step": 209
    },
    {
      "epoch": 0.9770282058738006,
      "grad_norm": 1.678638219833374,
      "learning_rate": 1.8966712629558957e-07,
      "loss": 1.5329,
      "step": 210
    },
    {
      "epoch": 0.9816807211398663,
      "grad_norm": 1.6849240064620972,
      "learning_rate": 1.0670251976275803e-07,
      "loss": 1.5622,
      "step": 211
    },
    {
      "epoch": 0.986333236405932,
      "grad_norm": 1.7889765501022339,
      "learning_rate": 4.74280286634099e-08,
      "loss": 1.4413,
      "step": 212
    },
    {
      "epoch": 0.9909857516719977,
      "grad_norm": 1.7154433727264404,
      "learning_rate": 1.1857710192308969e-08,
      "loss": 1.5593,
      "step": 213
    },
    {
      "epoch": 0.9956382669380633,
      "grad_norm": 1.744558572769165,
      "learning_rate": 0.0,
      "loss": 1.5847,
      "step": 214
    }
  ],
  "logging_steps": 1,
  "max_steps": 214,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 100,
  "stateful_callbacks": {
    "EarlyStoppingCallback": {
      "args": {
        "early_stopping_patience": 30,
        "early_stopping_threshold": 0.0
      },
      "attributes": {
        "early_stopping_patience_counter": 0
      }
    },
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 1.1181661243205222e+17,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}