{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.97196261682243,
  "eval_steps": 500,
  "global_step": 240,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.12461059190031153,
      "grad_norm": 0.9156144261360168,
      "learning_rate": 0.00019250000000000002,
      "loss": 0.3465,
      "step": 10
    },
    {
      "epoch": 0.24922118380062305,
      "grad_norm": 0.8195157647132874,
      "learning_rate": 0.00018416666666666665,
      "loss": 0.329,
      "step": 20
    },
    {
      "epoch": 0.37383177570093457,
      "grad_norm": 0.6960992813110352,
      "learning_rate": 0.00017583333333333334,
      "loss": 0.2746,
      "step": 30
    },
    {
      "epoch": 0.4984423676012461,
      "grad_norm": 0.6025754809379578,
      "learning_rate": 0.0001675,
      "loss": 0.311,
      "step": 40
    },
    {
      "epoch": 0.6230529595015576,
      "grad_norm": 0.8380621075630188,
      "learning_rate": 0.00015916666666666667,
      "loss": 0.324,
      "step": 50
    },
    {
      "epoch": 0.7476635514018691,
      "grad_norm": 0.7013344764709473,
      "learning_rate": 0.00015083333333333333,
      "loss": 0.3203,
      "step": 60
    },
    {
      "epoch": 0.8722741433021807,
      "grad_norm": 0.7946021556854248,
      "learning_rate": 0.00014250000000000002,
      "loss": 0.3048,
      "step": 70
    },
    {
      "epoch": 0.9968847352024922,
      "grad_norm": 0.7311714887619019,
      "learning_rate": 0.00013416666666666666,
      "loss": 0.2809,
      "step": 80
    },
    {
      "epoch": 1.1121495327102804,
      "grad_norm": 0.8304562568664551,
      "learning_rate": 0.00012583333333333335,
      "loss": 0.2608,
      "step": 90
    },
    {
      "epoch": 1.236760124610592,
      "grad_norm": 0.6632652878761292,
      "learning_rate": 0.00011750000000000001,
      "loss": 0.2839,
      "step": 100
    },
    {
      "epoch": 1.3613707165109035,
      "grad_norm": 0.6893765330314636,
      "learning_rate": 0.00010916666666666666,
      "loss": 0.2732,
      "step": 110
    },
    {
      "epoch": 1.485981308411215,
      "grad_norm": 0.7527514100074768,
      "learning_rate": 0.00010083333333333334,
      "loss": 0.2954,
      "step": 120
    },
    {
      "epoch": 1.6105919003115265,
      "grad_norm": 0.6240414977073669,
      "learning_rate": 9.250000000000001e-05,
      "loss": 0.2624,
      "step": 130
    },
    {
      "epoch": 1.735202492211838,
      "grad_norm": 0.7276539206504822,
      "learning_rate": 8.416666666666668e-05,
      "loss": 0.2713,
      "step": 140
    },
    {
      "epoch": 1.8598130841121496,
      "grad_norm": 0.7341501712799072,
      "learning_rate": 7.583333333333334e-05,
      "loss": 0.2519,
      "step": 150
    },
    {
      "epoch": 1.9844236760124612,
      "grad_norm": 0.8342993259429932,
      "learning_rate": 6.750000000000001e-05,
      "loss": 0.2686,
      "step": 160
    },
    {
      "epoch": 2.0996884735202492,
      "grad_norm": 0.6449198126792908,
      "learning_rate": 5.916666666666667e-05,
      "loss": 0.2368,
      "step": 170
    },
    {
      "epoch": 2.2242990654205608,
      "grad_norm": 0.5292518734931946,
      "learning_rate": 5.0833333333333333e-05,
      "loss": 0.2497,
      "step": 180
    },
    {
      "epoch": 2.3489096573208723,
      "grad_norm": 0.7724623084068298,
      "learning_rate": 4.25e-05,
      "loss": 0.2412,
      "step": 190
    },
    {
      "epoch": 2.473520249221184,
      "grad_norm": 0.7042115330696106,
      "learning_rate": 3.4166666666666666e-05,
      "loss": 0.2488,
      "step": 200
    },
    {
      "epoch": 2.5981308411214954,
      "grad_norm": 0.675959050655365,
      "learning_rate": 2.5833333333333336e-05,
      "loss": 0.2772,
      "step": 210
    },
    {
      "epoch": 2.722741433021807,
      "grad_norm": 0.6327322721481323,
      "learning_rate": 1.75e-05,
      "loss": 0.2684,
      "step": 220
    },
    {
      "epoch": 2.8473520249221185,
      "grad_norm": 0.4853314757347107,
      "learning_rate": 9.166666666666666e-06,
      "loss": 0.2581,
      "step": 230
    },
    {
      "epoch": 2.97196261682243,
      "grad_norm": 0.7433005571365356,
      "learning_rate": 8.333333333333333e-07,
      "loss": 0.2624,
      "step": 240
    }
  ],
  "logging_steps": 10,
  "max_steps": 240,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 30,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 4.596380699384218e+16,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}