{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.23494860499265785,
  "eval_steps": 500,
  "global_step": 200,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.011747430249632892,
      "grad_norm": 1.5699902772903442,
      "learning_rate": 0.00027,
      "loss": 3.0983,
      "step": 10
    },
    {
      "epoch": 0.023494860499265784,
      "grad_norm": 1.6029695272445679,
      "learning_rate": 0.00029991523567092526,
      "loss": 2.062,
      "step": 20
    },
    {
      "epoch": 0.03524229074889868,
      "grad_norm": 1.593436360359192,
      "learning_rate": 0.00029962234616583063,
      "loss": 1.2074,
      "step": 30
    },
    {
      "epoch": 0.04698972099853157,
      "grad_norm": 0.5851414799690247,
      "learning_rate": 0.00029912069357315393,
      "loss": 0.888,
      "step": 40
    },
    {
      "epoch": 0.05873715124816446,
      "grad_norm": 0.25992292165756226,
      "learning_rate": 0.0002984109778320875,
      "loss": 0.7685,
      "step": 50
    },
    {
      "epoch": 0.07048458149779736,
      "grad_norm": 0.21082307398319244,
      "learning_rate": 0.00029749418918542057,
      "loss": 0.7096,
      "step": 60
    },
    {
      "epoch": 0.08223201174743025,
      "grad_norm": 0.16843102872371674,
      "learning_rate": 0.0002963716067978866,
      "loss": 0.6901,
      "step": 70
    },
    {
      "epoch": 0.09397944199706314,
      "grad_norm": 0.12076722830533981,
      "learning_rate": 0.000295044796971387,
      "loss": 0.6702,
      "step": 80
    },
    {
      "epoch": 0.10572687224669604,
      "grad_norm": 0.21371866762638092,
      "learning_rate": 0.000293515610959582,
      "loss": 0.6353,
      "step": 90
    },
    {
      "epoch": 0.11747430249632893,
      "grad_norm": 0.13458965718746185,
      "learning_rate": 0.0002917861823848985,
      "loss": 0.6479,
      "step": 100
    },
    {
      "epoch": 0.12922173274596183,
      "grad_norm": 0.265765517950058,
      "learning_rate": 0.0002898589242615568,
      "loss": 0.6244,
      "step": 110
    },
    {
      "epoch": 0.14096916299559473,
      "grad_norm": 0.1473032385110855,
      "learning_rate": 0.0002877365256287728,
      "loss": 0.6217,
      "step": 120
    },
    {
      "epoch": 0.1527165932452276,
      "grad_norm": 0.1591167151927948,
      "learning_rate": 0.00028542194779883047,
      "loss": 0.6022,
      "step": 130
    },
    {
      "epoch": 0.1644640234948605,
      "grad_norm": 0.13270772993564606,
      "learning_rate": 0.00028291842022526133,
      "loss": 0.6098,
      "step": 140
    },
    {
      "epoch": 0.1762114537444934,
      "grad_norm": 0.1444919854402542,
      "learning_rate": 0.0002802294359968954,
      "loss": 0.5971,
      "step": 150
    },
    {
      "epoch": 0.18795888399412627,
      "grad_norm": 0.1571902334690094,
      "learning_rate": 0.0002773587469640702,
      "loss": 0.5937,
      "step": 160
    },
    {
      "epoch": 0.19970631424375918,
      "grad_norm": 0.11585285514593124,
      "learning_rate": 0.0002743103585037989,
      "loss": 0.6054,
      "step": 170
    },
    {
      "epoch": 0.21145374449339208,
      "grad_norm": 0.10303252190351486,
      "learning_rate": 0.0002710885239312008,
      "loss": 0.5708,
      "step": 180
    },
    {
      "epoch": 0.22320117474302498,
      "grad_norm": 0.09355439245700836,
      "learning_rate": 0.00026769773856499167,
      "loss": 0.5806,
      "step": 190
    },
    {
      "epoch": 0.23494860499265785,
      "grad_norm": 0.09288550913333893,
      "learning_rate": 0.0002641427334553158,
      "loss": 0.5747,
      "step": 200
    }
  ],
  "logging_steps": 10,
  "max_steps": 851,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 40,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 4.2381453081706496e+17,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}