{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.5945303210463734,
  "eval_steps": 50,
  "global_step": 250,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.023781212841854936,
      "grad_norm": 11.536224365234375,
      "learning_rate": 3e-05,
      "loss": 5.0938,
      "step": 10
    },
    {
      "epoch": 0.04756242568370987,
      "grad_norm": 3.4980711936950684,
      "learning_rate": 0.00013000000000000002,
      "loss": 3.1992,
      "step": 20
    },
    {
      "epoch": 0.0713436385255648,
      "grad_norm": 2.206911325454712,
      "learning_rate": 0.00019951729686242962,
      "loss": 2.0348,
      "step": 30
    },
    {
      "epoch": 0.09512485136741974,
      "grad_norm": 1.7694568634033203,
      "learning_rate": 0.00019790828640386162,
      "loss": 1.9145,
      "step": 40
    },
    {
      "epoch": 0.11890606420927467,
      "grad_norm": 1.6293410062789917,
      "learning_rate": 0.00019629927594529365,
      "loss": 1.7651,
      "step": 50
    },
    {
      "epoch": 0.1426872770511296,
      "grad_norm": 1.8220889568328857,
      "learning_rate": 0.00019469026548672567,
      "loss": 1.8897,
      "step": 60
    },
    {
      "epoch": 0.16646848989298454,
      "grad_norm": 1.842227578163147,
      "learning_rate": 0.0001930812550281577,
      "loss": 1.8391,
      "step": 70
    },
    {
      "epoch": 0.1902497027348395,
      "grad_norm": 1.6302684545516968,
      "learning_rate": 0.00019147224456958972,
      "loss": 1.7881,
      "step": 80
    },
    {
      "epoch": 0.2140309155766944,
      "grad_norm": 1.7633960247039795,
      "learning_rate": 0.00018986323411102174,
      "loss": 1.7444,
      "step": 90
    },
    {
      "epoch": 0.23781212841854935,
      "grad_norm": 1.9522314071655273,
      "learning_rate": 0.00018825422365245377,
      "loss": 1.6647,
      "step": 100
    },
    {
      "epoch": 0.2615933412604043,
      "grad_norm": 1.5591909885406494,
      "learning_rate": 0.00018664521319388576,
      "loss": 1.6839,
      "step": 110
    },
    {
      "epoch": 0.2853745541022592,
      "grad_norm": 1.9646824598312378,
      "learning_rate": 0.00018503620273531779,
      "loss": 1.7177,
      "step": 120
    },
    {
      "epoch": 0.3091557669441142,
      "grad_norm": 2.0045852661132812,
      "learning_rate": 0.0001834271922767498,
      "loss": 1.7352,
      "step": 130
    },
    {
      "epoch": 0.3329369797859691,
      "grad_norm": 1.6761493682861328,
      "learning_rate": 0.00018181818181818183,
      "loss": 1.5562,
      "step": 140
    },
    {
      "epoch": 0.356718192627824,
      "grad_norm": 1.720191478729248,
      "learning_rate": 0.00018020917135961383,
      "loss": 1.8305,
      "step": 150
    },
    {
      "epoch": 0.380499405469679,
      "grad_norm": 1.689537763595581,
      "learning_rate": 0.00017860016090104586,
      "loss": 1.6121,
      "step": 160
    },
    {
      "epoch": 0.4042806183115339,
      "grad_norm": 1.6469579935073853,
      "learning_rate": 0.0001769911504424779,
      "loss": 1.5827,
      "step": 170
    },
    {
      "epoch": 0.4280618311533888,
      "grad_norm": 1.637831449508667,
      "learning_rate": 0.0001753821399839099,
      "loss": 1.6039,
      "step": 180
    },
    {
      "epoch": 0.4518430439952438,
      "grad_norm": 2.1786320209503174,
      "learning_rate": 0.00017377312952534193,
      "loss": 1.7008,
      "step": 190
    },
    {
      "epoch": 0.4756242568370987,
      "grad_norm": 1.634881615638733,
      "learning_rate": 0.00017216411906677395,
      "loss": 1.5673,
      "step": 200
    },
    {
      "epoch": 0.4994054696789536,
      "grad_norm": 1.66987144947052,
      "learning_rate": 0.00017055510860820595,
      "loss": 1.5921,
      "step": 210
    },
    {
      "epoch": 0.5231866825208086,
      "grad_norm": 1.7795851230621338,
      "learning_rate": 0.00016894609814963797,
      "loss": 1.7279,
      "step": 220
    },
    {
      "epoch": 0.5469678953626635,
      "grad_norm": 1.574320673942566,
      "learning_rate": 0.00016733708769107,
      "loss": 1.5779,
      "step": 230
    },
    {
      "epoch": 0.5707491082045184,
      "grad_norm": 1.588630199432373,
      "learning_rate": 0.00016572807723250202,
      "loss": 1.6371,
      "step": 240
    },
    {
      "epoch": 0.5945303210463734,
      "grad_norm": 1.5262938737869263,
      "learning_rate": 0.00016411906677393404,
      "loss": 1.6228,
      "step": 250
    }
  ],
  "logging_steps": 10,
  "max_steps": 1263,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 50,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 2.882807873959104e+16,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}