{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.0,
  "eval_steps": 200,
  "global_step": 729,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.004117344312918168,
      "grad_norm": 3.3125,
      "learning_rate": 0.0,
      "loss": 1.7239,
      "step": 1
    },
    {
      "epoch": 0.020586721564590838,
      "grad_norm": 2.265625,
      "learning_rate": 1.0958904109589042e-05,
      "loss": 1.725,
      "step": 5
    },
    {
      "epoch": 0.041173443129181676,
      "grad_norm": 1.6640625,
      "learning_rate": 2.4657534246575342e-05,
      "loss": 1.6554,
      "step": 10
    },
    {
      "epoch": 0.06176016469377252,
      "grad_norm": 1.65625,
      "learning_rate": 3.8356164383561644e-05,
      "loss": 1.5948,
      "step": 15
    },
    {
      "epoch": 0.08234688625836335,
      "grad_norm": 1.8203125,
      "learning_rate": 5.2054794520547945e-05,
      "loss": 1.5621,
      "step": 20
    },
    {
      "epoch": 0.1029336078229542,
      "grad_norm": 1.8359375,
      "learning_rate": 6.575342465753424e-05,
      "loss": 1.5373,
      "step": 25
    },
    {
      "epoch": 0.12352032938754504,
      "grad_norm": 1.625,
      "learning_rate": 7.945205479452055e-05,
      "loss": 1.4909,
      "step": 30
    },
    {
      "epoch": 0.14410705095213588,
      "grad_norm": 1.3046875,
      "learning_rate": 9.315068493150684e-05,
      "loss": 1.4503,
      "step": 35
    },
    {
      "epoch": 0.1646937725167267,
      "grad_norm": 1.125,
      "learning_rate": 0.00010684931506849317,
      "loss": 1.4406,
      "step": 40
    },
    {
      "epoch": 0.18528049408131755,
      "grad_norm": 1.09375,
      "learning_rate": 0.00012054794520547945,
      "loss": 1.446,
      "step": 45
    },
    {
      "epoch": 0.2058672156459084,
      "grad_norm": 1.5078125,
      "learning_rate": 0.00013424657534246576,
      "loss": 1.4431,
      "step": 50
    },
    {
      "epoch": 0.22645393721049922,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00014794520547945205,
      "loss": 1.4659,
      "step": 55
    },
    {
      "epoch": 0.24704065877509007,
      "grad_norm": 1.359375,
      "learning_rate": 0.00016164383561643837,
      "loss": 1.4371,
      "step": 60
    },
    {
      "epoch": 0.2676273803396809,
      "grad_norm": 1.2109375,
      "learning_rate": 0.00017534246575342467,
      "loss": 1.4631,
      "step": 65
    },
    {
      "epoch": 0.28821410190427177,
      "grad_norm": 1.4765625,
      "learning_rate": 0.00018904109589041096,
      "loss": 1.4515,
      "step": 70
    },
    {
      "epoch": 0.30880082346886256,
      "grad_norm": 1.1953125,
      "learning_rate": 0.00019999885326982427,
      "loss": 1.4602,
      "step": 75
    },
    {
      "epoch": 0.3293875450334534,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00019995872047508514,
      "loss": 1.4369,
      "step": 80
    },
    {
      "epoch": 0.34997426659804426,
      "grad_norm": 0.953125,
      "learning_rate": 0.00019986127746865,
      "loss": 1.454,
      "step": 85
    },
    {
      "epoch": 0.3705609881626351,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00019970658011837404,
      "loss": 1.4406,
      "step": 90
    },
    {
      "epoch": 0.39114770972722596,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00019949471711824872,
      "loss": 1.445,
      "step": 95
    },
    {
      "epoch": 0.4117344312918168,
      "grad_norm": 0.83203125,
      "learning_rate": 0.0001992258099375498,
      "loss": 1.4431,
      "step": 100
    },
    {
      "epoch": 0.4323211528564076,
      "grad_norm": 0.68359375,
      "learning_rate": 0.00019890001275119453,
      "loss": 1.46,
      "step": 105
    },
    {
      "epoch": 0.45290787442099845,
      "grad_norm": 0.86328125,
      "learning_rate": 0.000198517512351347,
      "loss": 1.4477,
      "step": 110
    },
    {
      "epoch": 0.4734945959855893,
      "grad_norm": 0.70703125,
      "learning_rate": 0.00019807852804032305,
      "loss": 1.4202,
      "step": 115
    },
    {
      "epoch": 0.49408131755018014,
      "grad_norm": 0.69921875,
      "learning_rate": 0.00019758331150485575,
      "loss": 1.4393,
      "step": 120
    },
    {
      "epoch": 0.514668039114771,
      "grad_norm": 0.69921875,
      "learning_rate": 0.00019703214667179353,
      "loss": 1.4277,
      "step": 125
    },
    {
      "epoch": 0.5352547606793618,
      "grad_norm": 0.7578125,
      "learning_rate": 0.0001964253495453141,
      "loss": 1.4421,
      "step": 130
    },
    {
      "epoch": 0.5558414822439527,
      "grad_norm": 0.61328125,
      "learning_rate": 0.0001957632680257468,
      "loss": 1.4553,
      "step": 135
    },
    {
      "epoch": 0.5764282038085435,
      "grad_norm": 0.6640625,
      "learning_rate": 0.0001950462817101079,
      "loss": 1.4364,
      "step": 140
    },
    {
      "epoch": 0.5970149253731343,
      "grad_norm": 0.6171875,
      "learning_rate": 0.000194274801674463,
      "loss": 1.4269,
      "step": 145
    },
    {
      "epoch": 0.6176016469377251,
      "grad_norm": 0.625,
      "learning_rate": 0.0001934492702382411,
      "loss": 1.4327,
      "step": 150
    },
    {
      "epoch": 0.638188368502316,
      "grad_norm": 0.6640625,
      "learning_rate": 0.0001925701607106357,
      "loss": 1.4219,
      "step": 155
    },
    {
      "epoch": 0.6587750900669068,
      "grad_norm": 0.6796875,
      "learning_rate": 0.00019163797711923823,
      "loss": 1.417,
      "step": 160
    },
    {
      "epoch": 0.6793618116314977,
      "grad_norm": 0.64453125,
      "learning_rate": 0.0001906532539210597,
      "loss": 1.4277,
      "step": 165
    },
    {
      "epoch": 0.6999485331960885,
      "grad_norm": 0.62109375,
      "learning_rate": 0.00018961655569610557,
      "loss": 1.407,
      "step": 170
    },
    {
      "epoch": 0.7205352547606794,
      "grad_norm": 0.640625,
      "learning_rate": 0.0001885284768236801,
      "loss": 1.4221,
      "step": 175
    },
    {
      "epoch": 0.7411219763252702,
      "grad_norm": 0.58984375,
      "learning_rate": 0.00018738964114160583,
      "loss": 1.4035,
      "step": 180
    },
    {
      "epoch": 0.7617086978898611,
      "grad_norm": 0.58984375,
      "learning_rate": 0.00018620070158855301,
      "loss": 1.4082,
      "step": 185
    },
    {
      "epoch": 0.7822954194544519,
      "grad_norm": 0.59375,
      "learning_rate": 0.00018496233982968457,
      "loss": 1.408,
      "step": 190
    },
    {
      "epoch": 0.8028821410190428,
      "grad_norm": 0.58984375,
      "learning_rate": 0.00018367526586583098,
      "loss": 1.4207,
      "step": 195
    },
    {
      "epoch": 0.8234688625836336,
      "grad_norm": 0.62109375,
      "learning_rate": 0.00018234021762641945,
      "loss": 1.4151,
      "step": 200
    },
    {
      "epoch": 0.8234688625836336,
      "eval_loss": 1.4270434379577637,
      "eval_runtime": 300.5446,
      "eval_samples_per_second": 22.978,
      "eval_steps_per_second": 1.437,
      "step": 200
    },
    {
      "epoch": 0.8440555841482243,
      "grad_norm": 0.65234375,
      "learning_rate": 0.00018095796054639013,
      "loss": 1.4259,
      "step": 205
    },
    {
      "epoch": 0.8646423057128152,
      "grad_norm": 0.57421875,
      "learning_rate": 0.00017952928712734268,
      "loss": 1.3941,
      "step": 210
    },
    {
      "epoch": 0.885229027277406,
      "grad_norm": 0.59765625,
      "learning_rate": 0.00017805501648316424,
      "loss": 1.3959,
      "step": 215
    },
    {
      "epoch": 0.9058157488419969,
      "grad_norm": 0.609375,
      "learning_rate": 0.00017653599387039992,
      "loss": 1.4122,
      "step": 220
    },
    {
      "epoch": 0.9264024704065877,
      "grad_norm": 0.62109375,
      "learning_rate": 0.00017497309020363417,
      "loss": 1.4074,
      "step": 225
    },
    {
      "epoch": 0.9469891919711786,
      "grad_norm": 0.54296875,
      "learning_rate": 0.00017336720155616185,
      "loss": 1.421,
      "step": 230
    },
    {
      "epoch": 0.9675759135357694,
      "grad_norm": 0.61328125,
      "learning_rate": 0.00017171924864623454,
      "loss": 1.4146,
      "step": 235
    },
    {
      "epoch": 0.9881626351003603,
      "grad_norm": 0.52734375,
      "learning_rate": 0.0001700301763091771,
      "loss": 1.3834,
      "step": 240
    },
    {
      "epoch": 1.0082346886258364,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00016830095295567693,
      "loss": 1.3598,
      "step": 245
    },
    {
      "epoch": 1.0288214101904272,
      "grad_norm": 0.875,
      "learning_rate": 0.00016653257001655652,
      "loss": 1.2955,
      "step": 250
    },
    {
      "epoch": 1.0494081317550181,
      "grad_norm": 0.5703125,
      "learning_rate": 0.00016472604137434784,
      "loss": 1.2763,
      "step": 255
    },
    {
      "epoch": 1.0699948533196089,
      "grad_norm": 0.6015625,
      "learning_rate": 0.00016288240278199395,
      "loss": 1.2821,
      "step": 260
    },
    {
      "epoch": 1.0905815748841996,
      "grad_norm": 0.5390625,
      "learning_rate": 0.00016100271126901175,
      "loss": 1.2932,
      "step": 265
    },
    {
      "epoch": 1.1111682964487906,
      "grad_norm": 0.54296875,
      "learning_rate": 0.00015908804453545607,
      "loss": 1.2985,
      "step": 270
    },
    {
      "epoch": 1.1317550180133813,
      "grad_norm": 0.54296875,
      "learning_rate": 0.0001571395003340323,
      "loss": 1.2692,
      "step": 275
    },
    {
      "epoch": 1.1523417395779723,
      "grad_norm": 0.5546875,
      "learning_rate": 0.00015515819584071216,
      "loss": 1.2829,
      "step": 280
    },
    {
      "epoch": 1.172928461142563,
      "grad_norm": 0.50390625,
      "learning_rate": 0.0001531452670142135,
      "loss": 1.2881,
      "step": 285
    },
    {
      "epoch": 1.193515182707154,
      "grad_norm": 0.53125,
      "learning_rate": 0.00015110186794471103,
      "loss": 1.2853,
      "step": 290
    },
    {
      "epoch": 1.2141019042717447,
      "grad_norm": 0.578125,
      "learning_rate": 0.00014902917019215164,
      "loss": 1.2629,
      "step": 295
    },
    {
      "epoch": 1.2346886258363357,
      "grad_norm": 0.53515625,
      "learning_rate": 0.00014692836211455373,
      "loss": 1.284,
      "step": 300
    },
    {
      "epoch": 1.2552753474009264,
      "grad_norm": 0.498046875,
      "learning_rate": 0.00014480064818667528,
      "loss": 1.2776,
      "step": 305
    },
    {
      "epoch": 1.2758620689655173,
      "grad_norm": 0.5546875,
      "learning_rate": 0.000142647248309442,
      "loss": 1.2677,
      "step": 310
    },
    {
      "epoch": 1.296448790530108,
      "grad_norm": 0.5625,
      "learning_rate": 0.0001404693971105306,
      "loss": 1.2838,
      "step": 315
    },
    {
      "epoch": 1.3170355120946988,
      "grad_norm": 0.58203125,
      "learning_rate": 0.000138268343236509,
      "loss": 1.2875,
      "step": 320
    },
    {
      "epoch": 1.3376222336592898,
      "grad_norm": 0.5234375,
      "learning_rate": 0.00013604534863693868,
      "loss": 1.2793,
      "step": 325
    },
    {
      "epoch": 1.3582089552238805,
      "grad_norm": 0.52734375,
      "learning_rate": 0.00013380168784085027,
      "loss": 1.289,
      "step": 330
    },
    {
      "epoch": 1.3787956767884715,
      "grad_norm": 0.51953125,
      "learning_rate": 0.0001315386472260063,
      "loss": 1.2902,
      "step": 335
    },
    {
      "epoch": 1.3993823983530622,
      "grad_norm": 0.50390625,
      "learning_rate": 0.00012925752428137125,
      "loss": 1.2931,
      "step": 340
    },
    {
      "epoch": 1.4199691199176532,
      "grad_norm": 0.470703125,
      "learning_rate": 0.0001269596268632105,
      "loss": 1.2757,
      "step": 345
    },
    {
      "epoch": 1.440555841482244,
      "grad_norm": 0.52734375,
      "learning_rate": 0.00012464627244524593,
      "loss": 1.2884,
      "step": 350
    },
    {
      "epoch": 1.4611425630468349,
      "grad_norm": 0.51171875,
      "learning_rate": 0.00012231878736329683,
      "loss": 1.2945,
      "step": 355
    },
    {
      "epoch": 1.4817292846114256,
      "grad_norm": 0.49609375,
      "learning_rate": 0.00011997850605484033,
      "loss": 1.2876,
      "step": 360
    },
    {
      "epoch": 1.5023160061760166,
      "grad_norm": 0.51171875,
      "learning_rate": 0.00011762677029392652,
      "loss": 1.2849,
      "step": 365
    },
    {
      "epoch": 1.5229027277406073,
      "grad_norm": 0.482421875,
      "learning_rate": 0.00011526492842188745,
      "loss": 1.2768,
      "step": 370
    },
    {
      "epoch": 1.543489449305198,
      "grad_norm": 0.49609375,
      "learning_rate": 0.0001128943345742806,
      "loss": 1.2785,
      "step": 375
    },
    {
      "epoch": 1.564076170869789,
      "grad_norm": 0.494140625,
      "learning_rate": 0.0001105163479045106,
      "loss": 1.2818,
      "step": 380
    },
    {
      "epoch": 1.58466289243438,
      "grad_norm": 0.462890625,
      "learning_rate": 0.00010813233180457367,
      "loss": 1.2722,
      "step": 385
    },
    {
      "epoch": 1.6052496139989707,
      "grad_norm": 0.470703125,
      "learning_rate": 0.00010574365312337235,
      "loss": 1.2864,
      "step": 390
    },
    {
      "epoch": 1.6258363355635614,
      "grad_norm": 0.451171875,
      "learning_rate": 0.00010335168138304776,
      "loss": 1.2681,
      "step": 395
    },
    {
      "epoch": 1.6464230571281524,
      "grad_norm": 0.5,
      "learning_rate": 0.00010095778799377959,
      "loss": 1.2891,
      "step": 400
    },
    {
      "epoch": 1.6464230571281524,
      "eval_loss": 1.3904892206192017,
      "eval_runtime": 300.5338,
      "eval_samples_per_second": 22.979,
      "eval_steps_per_second": 1.437,
      "step": 400
    },
    {
      "epoch": 1.6670097786927431,
      "grad_norm": 0.478515625,
      "learning_rate": 9.856334546750349e-05,
      "loss": 1.2842,
      "step": 405
    },
    {
      "epoch": 1.6875965002573339,
      "grad_norm": 0.458984375,
      "learning_rate": 9.616972663099647e-05,
      "loss": 1.2671,
      "step": 410
    },
    {
      "epoch": 1.7081832218219248,
      "grad_norm": 0.45703125,
      "learning_rate": 9.377830383878246e-05,
      "loss": 1.2617,
      "step": 415
    },
    {
      "epoch": 1.7287699433865158,
      "grad_norm": 0.453125,
      "learning_rate": 9.139044818630784e-05,
      "loss": 1.2609,
      "step": 420
    },
    {
      "epoch": 1.7493566649511065,
      "grad_norm": 0.435546875,
      "learning_rate": 8.90075287238395e-05,
      "loss": 1.2686,
      "step": 425
    },
    {
      "epoch": 1.7699433865156973,
      "grad_norm": 0.458984375,
      "learning_rate": 8.663091167153516e-05,
      "loss": 1.2603,
      "step": 430
    },
    {
      "epoch": 1.7905301080802882,
      "grad_norm": 0.4375,
      "learning_rate": 8.426195963613626e-05,
      "loss": 1.2728,
      "step": 435
    },
    {
      "epoch": 1.8111168296448792,
      "grad_norm": 0.4375,
      "learning_rate": 8.190203082973272e-05,
      "loss": 1.2503,
      "step": 440
    },
    {
      "epoch": 1.83170355120947,
      "grad_norm": 0.462890625,
      "learning_rate": 7.955247829104738e-05,
      "loss": 1.2781,
      "step": 445
    },
    {
      "epoch": 1.8522902727740607,
      "grad_norm": 0.435546875,
      "learning_rate": 7.721464910968627e-05,
      "loss": 1.2635,
      "step": 450
    },
    {
      "epoch": 1.8728769943386516,
      "grad_norm": 0.45703125,
      "learning_rate": 7.488988365379984e-05,
      "loss": 1.262,
      "step": 455
    },
    {
      "epoch": 1.8934637159032424,
      "grad_norm": 0.423828125,
      "learning_rate": 7.25795148015982e-05,
      "loss": 1.24,
      "step": 460
    },
    {
      "epoch": 1.914050437467833,
      "grad_norm": 0.431640625,
      "learning_rate": 7.028486717715993e-05,
      "loss": 1.2687,
      "step": 465
    },
    {
      "epoch": 1.934637159032424,
      "grad_norm": 0.443359375,
      "learning_rate": 6.800725639097411e-05,
      "loss": 1.2644,
      "step": 470
    },
    {
      "epoch": 1.955223880597015,
      "grad_norm": 0.4375,
      "learning_rate": 6.574798828564948e-05,
      "loss": 1.2602,
      "step": 475
    },
    {
      "epoch": 1.9758106021616058,
      "grad_norm": 0.4375,
      "learning_rate": 6.35083581872245e-05,
      "loss": 1.271,
      "step": 480
    },
    {
      "epoch": 1.9963973237261965,
      "grad_norm": 0.42578125,
      "learning_rate": 6.128965016250637e-05,
      "loss": 1.253,
      "step": 485
    },
    {
      "epoch": 2.016469377251673,
      "grad_norm": 0.6875,
      "learning_rate": 5.909313628286601e-05,
      "loss": 1.1933,
      "step": 490
    },
    {
      "epoch": 2.0370560988162634,
      "grad_norm": 0.54296875,
      "learning_rate": 5.692007589491014e-05,
      "loss": 1.179,
      "step": 495
    },
    {
      "epoch": 2.0576428203808543,
      "grad_norm": 0.5390625,
      "learning_rate": 5.477171489844881e-05,
      "loss": 1.1669,
      "step": 500
    },
    {
      "epoch": 2.0782295419454453,
      "grad_norm": 0.443359375,
      "learning_rate": 5.2649285032173045e-05,
      "loss": 1.1673,
      "step": 505
    },
    {
      "epoch": 2.0988162635100363,
      "grad_norm": 0.4375,
      "learning_rate": 5.055400316745096e-05,
      "loss": 1.1703,
      "step": 510
    },
    {
      "epoch": 2.1194029850746268,
      "grad_norm": 0.427734375,
      "learning_rate": 4.848707061064849e-05,
      "loss": 1.1856,
      "step": 515
    },
    {
      "epoch": 2.1399897066392177,
      "grad_norm": 0.41015625,
      "learning_rate": 4.64496724143736e-05,
      "loss": 1.1569,
      "step": 520
    },
    {
      "epoch": 2.1605764282038087,
      "grad_norm": 0.419921875,
      "learning_rate": 4.444297669803981e-05,
      "loss": 1.1611,
      "step": 525
    },
    {
      "epoch": 2.181163149768399,
      "grad_norm": 0.421875,
      "learning_rate": 4.2468133978137945e-05,
      "loss": 1.1738,
      "step": 530
    },
    {
      "epoch": 2.20174987133299,
      "grad_norm": 0.40234375,
      "learning_rate": 4.052627650860057e-05,
      "loss": 1.1708,
      "step": 535
    },
    {
      "epoch": 2.222336592897581,
      "grad_norm": 0.39453125,
      "learning_rate": 3.861851763163665e-05,
      "loss": 1.1789,
      "step": 540
    },
    {
      "epoch": 2.242923314462172,
      "grad_norm": 0.416015625,
      "learning_rate": 3.6745951139409395e-05,
      "loss": 1.1662,
      "step": 545
    },
    {
      "epoch": 2.2635100360267626,
      "grad_norm": 0.41015625,
      "learning_rate": 3.4909650646922896e-05,
      "loss": 1.1598,
      "step": 550
    },
    {
      "epoch": 2.2840967575913536,
      "grad_norm": 0.396484375,
      "learning_rate": 3.3110668976476746e-05,
      "loss": 1.1604,
      "step": 555
    },
    {
      "epoch": 2.3046834791559445,
      "grad_norm": 0.40234375,
      "learning_rate": 3.1350037554042446e-05,
      "loss": 1.1548,
      "step": 560
    },
    {
      "epoch": 2.325270200720535,
      "grad_norm": 0.388671875,
      "learning_rate": 2.9628765817906667e-05,
      "loss": 1.1779,
      "step": 565
    },
    {
      "epoch": 2.345856922285126,
      "grad_norm": 0.3984375,
      "learning_rate": 2.794784063992131e-05,
      "loss": 1.1664,
      "step": 570
    },
    {
      "epoch": 2.366443643849717,
      "grad_norm": 0.404296875,
      "learning_rate": 2.6308225759691428e-05,
      "loss": 1.1706,
      "step": 575
    },
    {
      "epoch": 2.387030365414308,
      "grad_norm": 0.40625,
      "learning_rate": 2.471086123202602e-05,
      "loss": 1.1739,
      "step": 580
    },
    {
      "epoch": 2.4076170869788984,
      "grad_norm": 0.392578125,
      "learning_rate": 2.3156662887968207e-05,
      "loss": 1.1613,
      "step": 585
    },
    {
      "epoch": 2.4282038085434894,
      "grad_norm": 0.390625,
      "learning_rate": 2.164652180971358e-05,
      "loss": 1.1611,
      "step": 590
    },
    {
      "epoch": 2.4487905301080803,
      "grad_norm": 0.392578125,
      "learning_rate": 2.0181303819718457e-05,
      "loss": 1.1646,
      "step": 595
    },
    {
      "epoch": 2.4693772516726713,
      "grad_norm": 0.388671875,
      "learning_rate": 1.8761848984290064e-05,
      "loss": 1.1677,
      "step": 600
    },
    {
      "epoch": 2.4693772516726713,
      "eval_loss": 1.3902530670166016,
      "eval_runtime": 300.5428,
      "eval_samples_per_second": 22.978,
      "eval_steps_per_second": 1.437,
      "step": 600
    },
    {
      "epoch": 2.489963973237262,
      "grad_norm": 0.380859375,
      "learning_rate": 1.7388971131944033e-05,
      "loss": 1.1783,
      "step": 605
    },
    {
      "epoch": 2.510550694801853,
      "grad_norm": 0.39453125,
      "learning_rate": 1.6063457386805004e-05,
      "loss": 1.1652,
      "step": 610
    },
    {
      "epoch": 2.5311374163664437,
      "grad_norm": 0.396484375,
      "learning_rate": 1.4786067717317743e-05,
      "loss": 1.1578,
      "step": 615
    },
    {
      "epoch": 2.5517241379310347,
      "grad_norm": 0.390625,
      "learning_rate": 1.3557534500527769e-05,
      "loss": 1.1665,
      "step": 620
    },
    {
      "epoch": 2.572310859495625,
      "grad_norm": 0.3828125,
      "learning_rate": 1.2378562102181179e-05,
      "loss": 1.1662,
      "step": 625
    },
    {
      "epoch": 2.592897581060216,
      "grad_norm": 0.3828125,
      "learning_rate": 1.1249826472884572e-05,
      "loss": 1.1713,
      "step": 630
    },
    {
      "epoch": 2.613484302624807,
      "grad_norm": 0.39453125,
      "learning_rate": 1.0171974760556114e-05,
      "loss": 1.1724,
      "step": 635
    },
    {
      "epoch": 2.6340710241893976,
      "grad_norm": 0.396484375,
      "learning_rate": 9.145624939390762e-06,
      "loss": 1.1559,
      "step": 640
    },
    {
      "epoch": 2.6546577457539886,
      "grad_norm": 0.380859375,
      "learning_rate": 8.171365455551506e-06,
      "loss": 1.1768,
      "step": 645
    },
    {
      "epoch": 2.6752444673185796,
      "grad_norm": 0.388671875,
      "learning_rate": 7.249754889790539e-06,
      "loss": 1.1638,
      "step": 650
    },
    {
      "epoch": 2.6958311888831705,
      "grad_norm": 0.3828125,
      "learning_rate": 6.38132163719305e-06,
      "loss": 1.1572,
      "step": 655
    },
    {
      "epoch": 2.716417910447761,
      "grad_norm": 0.384765625,
      "learning_rate": 5.56656360422797e-06,
      "loss": 1.1696,
      "step": 660
    },
    {
      "epoch": 2.737004632012352,
      "grad_norm": 0.376953125,
      "learning_rate": 4.805947923278864e-06,
      "loss": 1.1778,
      "step": 665
    },
    {
      "epoch": 2.757591353576943,
      "grad_norm": 0.38671875,
      "learning_rate": 4.099910684818698e-06,
      "loss": 1.177,
      "step": 670
    },
    {
      "epoch": 2.7781780751415335,
      "grad_norm": 0.380859375,
      "learning_rate": 3.4488566873824513e-06,
      "loss": 1.176,
      "step": 675
    },
    {
      "epoch": 2.7987647967061244,
      "grad_norm": 0.3828125,
      "learning_rate": 2.853159205480216e-06,
      "loss": 1.1635,
      "step": 680
    },
    {
      "epoch": 2.8193515182707154,
      "grad_norm": 0.380859375,
      "learning_rate": 2.3131597755845946e-06,
      "loss": 1.1825,
      "step": 685
    },
    {
      "epoch": 2.8399382398353064,
      "grad_norm": 0.3828125,
      "learning_rate": 1.8291680003145073e-06,
      "loss": 1.1639,
      "step": 690
    },
    {
      "epoch": 2.8605249613998973,
      "grad_norm": 0.380859375,
      "learning_rate": 1.4014613709280145e-06,
      "loss": 1.1742,
      "step": 695
    },
    {
      "epoch": 2.881111682964488,
      "grad_norm": 0.384765625,
      "learning_rate": 1.0302851082258369e-06,
      "loss": 1.1423,
      "step": 700
    },
    {
      "epoch": 2.901698404529079,
      "grad_norm": 0.384765625,
      "learning_rate": 7.158520219567533e-07,
      "loss": 1.1591,
      "step": 705
    },
    {
      "epoch": 2.9222851260936697,
      "grad_norm": 0.39453125,
      "learning_rate": 4.583423888055105e-07,
      "loss": 1.1736,
      "step": 710
    },
    {
      "epoch": 2.9428718476582603,
      "grad_norm": 0.384765625,
      "learning_rate": 2.57903849033192e-07,
      "loss": 1.1783,
      "step": 715
    },
    {
      "epoch": 2.9634585692228512,
      "grad_norm": 0.3828125,
      "learning_rate": 1.1465132182936212e-07,
      "loss": 1.173,
      "step": 720
    },
    {
      "epoch": 2.984045290787442,
      "grad_norm": 0.384765625,
      "learning_rate": 2.8666939424415452e-08,
      "loss": 1.1552,
      "step": 725
    },
    {
      "epoch": 3.0,
      "step": 729,
      "total_flos": 1.0094624425786737e+18,
      "train_loss": 1.2982249011391638,
      "train_runtime": 26293.112,
      "train_samples_per_second": 7.093,
      "train_steps_per_second": 0.028
    }
  ],
  "logging_steps": 5,
  "max_steps": 729,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 1000000,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 1.0094624425786737e+18,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}