{
  "best_global_step": 500,
  "best_metric": 0.8331602811813354,
  "best_model_checkpoint": "./biomistral-lora-finetuned/checkpoint-500",
  "epoch": 0.5416384563303994,
  "eval_steps": 500,
  "global_step": 500,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.010832769126607989,
      "grad_norm": 0.7727395296096802,
      "learning_rate": 1.8e-05,
      "loss": 0.889,
      "step": 10
    },
    {
      "epoch": 0.021665538253215978,
      "grad_norm": 0.8008129596710205,
      "learning_rate": 3.8e-05,
      "loss": 0.8378,
      "step": 20
    },
    {
      "epoch": 0.03249830737982397,
      "grad_norm": 0.9147247076034546,
      "learning_rate": 5.8e-05,
      "loss": 0.8108,
      "step": 30
    },
    {
      "epoch": 0.043331076506431955,
      "grad_norm": 0.8121607303619385,
      "learning_rate": 7.800000000000001e-05,
      "loss": 0.8597,
      "step": 40
    },
    {
      "epoch": 0.05416384563303995,
      "grad_norm": 1.0018593072891235,
      "learning_rate": 9.8e-05,
      "loss": 0.7486,
      "step": 50
    },
    {
      "epoch": 0.06499661475964794,
      "grad_norm": 1.2048218250274658,
      "learning_rate": 0.000118,
      "loss": 0.6825,
      "step": 60
    },
    {
      "epoch": 0.07582938388625593,
      "grad_norm": 0.9863468408584595,
      "learning_rate": 0.000138,
      "loss": 0.6539,
      "step": 70
    },
    {
      "epoch": 0.08666215301286391,
      "grad_norm": 1.2911494970321655,
      "learning_rate": 0.00015800000000000002,
      "loss": 0.6198,
      "step": 80
    },
    {
      "epoch": 0.0974949221394719,
      "grad_norm": 1.159672737121582,
      "learning_rate": 0.00017800000000000002,
      "loss": 0.6222,
      "step": 90
    },
    {
      "epoch": 0.1083276912660799,
      "grad_norm": 1.0924432277679443,
      "learning_rate": 0.00019800000000000002,
      "loss": 0.5923,
      "step": 100
    },
    {
      "epoch": 0.11916046039268788,
      "grad_norm": 1.3423463106155396,
      "learning_rate": 0.00019932634730538925,
      "loss": 0.5548,
      "step": 110
    },
    {
      "epoch": 0.12999322951929587,
      "grad_norm": 1.4929102659225464,
      "learning_rate": 0.00019857784431137723,
      "loss": 0.6701,
      "step": 120
    },
    {
      "epoch": 0.14082599864590387,
      "grad_norm": 0.9462954998016357,
      "learning_rate": 0.00019782934131736527,
      "loss": 0.8675,
      "step": 130
    },
    {
      "epoch": 0.15165876777251186,
      "grad_norm": 0.9912289977073669,
      "learning_rate": 0.0001970808383233533,
      "loss": 0.9074,
      "step": 140
    },
    {
      "epoch": 0.16249153689911983,
      "grad_norm": 1.1070538759231567,
      "learning_rate": 0.00019633233532934132,
      "loss": 0.8755,
      "step": 150
    },
    {
      "epoch": 0.17332430602572782,
      "grad_norm": 0.9465340375900269,
      "learning_rate": 0.00019558383233532936,
      "loss": 0.882,
      "step": 160
    },
    {
      "epoch": 0.18415707515233581,
      "grad_norm": 0.8657329678535461,
      "learning_rate": 0.00019483532934131737,
      "loss": 0.8737,
      "step": 170
    },
    {
      "epoch": 0.1949898442789438,
      "grad_norm": 0.7293577790260315,
      "learning_rate": 0.0001940868263473054,
      "loss": 0.8473,
      "step": 180
    },
    {
      "epoch": 0.2058226134055518,
      "grad_norm": 0.849353551864624,
      "learning_rate": 0.00019333832335329343,
      "loss": 0.9414,
      "step": 190
    },
    {
      "epoch": 0.2166553825321598,
      "grad_norm": 0.7525314688682556,
      "learning_rate": 0.00019258982035928144,
      "loss": 0.8852,
      "step": 200
    },
    {
      "epoch": 0.22748815165876776,
      "grad_norm": 1.0732208490371704,
      "learning_rate": 0.00019184131736526948,
      "loss": 0.8074,
      "step": 210
    },
    {
      "epoch": 0.23832092078537576,
      "grad_norm": 0.8420374393463135,
      "learning_rate": 0.0001910928143712575,
      "loss": 0.9508,
      "step": 220
    },
    {
      "epoch": 0.24915368991198375,
      "grad_norm": 0.8308244347572327,
      "learning_rate": 0.0001903443113772455,
      "loss": 0.8734,
      "step": 230
    },
    {
      "epoch": 0.25998645903859174,
      "grad_norm": 0.9915153384208679,
      "learning_rate": 0.00018959580838323354,
      "loss": 0.8816,
      "step": 240
    },
    {
      "epoch": 0.2708192281651997,
      "grad_norm": 4.8621978759765625,
      "learning_rate": 0.00018884730538922158,
      "loss": 0.8848,
      "step": 250
    },
    {
      "epoch": 0.28165199729180773,
      "grad_norm": 0.7945590019226074,
      "learning_rate": 0.0001880988023952096,
      "loss": 0.8503,
      "step": 260
    },
    {
      "epoch": 0.2924847664184157,
      "grad_norm": 0.7896672487258911,
      "learning_rate": 0.00018735029940119763,
      "loss": 0.8798,
      "step": 270
    },
    {
      "epoch": 0.3033175355450237,
      "grad_norm": 0.8870701789855957,
      "learning_rate": 0.00018660179640718564,
      "loss": 0.9112,
      "step": 280
    },
    {
      "epoch": 0.3141503046716317,
      "grad_norm": 0.9003740549087524,
      "learning_rate": 0.00018585329341317365,
      "loss": 0.846,
      "step": 290
    },
    {
      "epoch": 0.32498307379823965,
      "grad_norm": 0.7067676186561584,
      "learning_rate": 0.0001851047904191617,
      "loss": 0.8588,
      "step": 300
    },
    {
      "epoch": 0.3358158429248477,
      "grad_norm": 0.9696246385574341,
      "learning_rate": 0.0001843562874251497,
      "loss": 0.8244,
      "step": 310
    },
    {
      "epoch": 0.34664861205145564,
      "grad_norm": 0.9892609715461731,
      "learning_rate": 0.00018360778443113774,
      "loss": 0.8214,
      "step": 320
    },
    {
      "epoch": 0.35748138117806366,
      "grad_norm": 0.822260856628418,
      "learning_rate": 0.00018285928143712575,
      "loss": 0.7977,
      "step": 330
    },
    {
      "epoch": 0.36831415030467163,
      "grad_norm": 0.7743964791297913,
      "learning_rate": 0.00018211077844311376,
      "loss": 0.8002,
      "step": 340
    },
    {
      "epoch": 0.3791469194312796,
      "grad_norm": 0.7090775370597839,
      "learning_rate": 0.0001813622754491018,
      "loss": 0.8192,
      "step": 350
    },
    {
      "epoch": 0.3899796885578876,
      "grad_norm": 1.0970802307128906,
      "learning_rate": 0.00018061377245508984,
      "loss": 0.8516,
      "step": 360
    },
    {
      "epoch": 0.4008124576844956,
      "grad_norm": 0.9633163213729858,
      "learning_rate": 0.00017986526946107785,
      "loss": 0.8414,
      "step": 370
    },
    {
      "epoch": 0.4116452268111036,
      "grad_norm": 0.6846926808357239,
      "learning_rate": 0.00017911676646706587,
      "loss": 0.8187,
      "step": 380
    },
    {
      "epoch": 0.42247799593771157,
      "grad_norm": 0.7262110710144043,
      "learning_rate": 0.0001783682634730539,
      "loss": 0.8572,
      "step": 390
    },
    {
      "epoch": 0.4333107650643196,
      "grad_norm": 0.8537372350692749,
      "learning_rate": 0.00017761976047904192,
      "loss": 0.8286,
      "step": 400
    },
    {
      "epoch": 0.44414353419092756,
      "grad_norm": 0.8860271573066711,
      "learning_rate": 0.00017687125748502996,
      "loss": 0.8416,
      "step": 410
    },
    {
      "epoch": 0.4549763033175355,
      "grad_norm": 0.7984218597412109,
      "learning_rate": 0.000176122754491018,
      "loss": 0.8373,
      "step": 420
    },
    {
      "epoch": 0.46580907244414355,
      "grad_norm": 0.8060943484306335,
      "learning_rate": 0.000175374251497006,
      "loss": 0.9165,
      "step": 430
    },
    {
      "epoch": 0.4766418415707515,
      "grad_norm": 0.7871391177177429,
      "learning_rate": 0.00017462574850299402,
      "loss": 0.8276,
      "step": 440
    },
    {
      "epoch": 0.48747461069735953,
      "grad_norm": 0.7732688784599304,
      "learning_rate": 0.00017387724550898203,
      "loss": 0.8346,
      "step": 450
    },
    {
      "epoch": 0.4983073798239675,
      "grad_norm": 0.9314000606536865,
      "learning_rate": 0.00017312874251497007,
      "loss": 0.8291,
      "step": 460
    },
    {
      "epoch": 0.5091401489505755,
      "grad_norm": 0.6721988916397095,
      "learning_rate": 0.0001723802395209581,
      "loss": 0.7091,
      "step": 470
    },
    {
      "epoch": 0.5199729180771835,
      "grad_norm": 0.825965940952301,
      "learning_rate": 0.00017163173652694612,
      "loss": 0.8934,
      "step": 480
    },
    {
      "epoch": 0.5308056872037915,
      "grad_norm": 0.8427668213844299,
      "learning_rate": 0.00017088323353293413,
      "loss": 0.7603,
      "step": 490
    },
    {
      "epoch": 0.5416384563303994,
      "grad_norm": 1.0061259269714355,
      "learning_rate": 0.00017013473053892217,
      "loss": 0.8277,
      "step": 500
    },
    {
      "epoch": 0.5416384563303994,
      "eval_loss": 0.8331602811813354,
      "eval_runtime": 355.9061,
      "eval_samples_per_second": 4.614,
      "eval_steps_per_second": 2.307,
      "step": 500
    }
  ],
  "logging_steps": 10,
  "max_steps": 2772,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 1.76837068918358e+17,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}