diff --git a/README.md b/README.md
index d94fa0fb36d5aa76962cf3fda3ca0bfe4c2fa517..2d1596ffe16e4d5bdcdf0e1d4322e6667af95962 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 ---
-base_model: TheBloke/Llama-2-7B-fp16
 library_name: peft
+base_model: TheBloke/Llama-2-7B-fp16
 ---
 
 # Model Card for Model ID
diff --git a/adapter_config.json b/adapter_config.json
index 923a080092a0575b2d3304503832fcf7c77462f4..cbf93f2809e43fe18fd6ad23406293a68e7f5c98 100644
--- a/adapter_config.json
+++ b/adapter_config.json
@@ -20,10 +20,10 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "k_proj",
     "v_proj",
-    "q_proj",
     "o_proj",
-    "k_proj"
+    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,
diff --git a/adapter_model.safetensors b/adapter_model.safetensors
index 06513132c03fe764076772f7ca590751400c2853..b4f9fb90e930a7899b68e1787dce408f742c6dde 100644
--- a/adapter_model.safetensors
+++ b/adapter_model.safetensors
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:06e781f63931f2a767880f6d4b91f140eb5043f4a642a1a68d8adbb81bbcae9d
+oid sha256:58a8f71955b57e8e1082d6f6f180646592764af968e225bda2b732733e64a34c
 size 67143296
diff --git a/checkpoint-10/README.md b/checkpoint-10/README.md
index d94fa0fb36d5aa76962cf3fda3ca0bfe4c2fa517..2d1596ffe16e4d5bdcdf0e1d4322e6667af95962 100644
--- a/checkpoint-10/README.md
+++ b/checkpoint-10/README.md
@@ -1,6 +1,6 @@
 ---
-base_model: TheBloke/Llama-2-7B-fp16
 library_name: peft
+base_model: TheBloke/Llama-2-7B-fp16
 ---
 
 # Model Card for Model ID
diff --git a/checkpoint-10/adapter_config.json b/checkpoint-10/adapter_config.json
index 923a080092a0575b2d3304503832fcf7c77462f4..cbf93f2809e43fe18fd6ad23406293a68e7f5c98 100644
--- a/checkpoint-10/adapter_config.json
+++ b/checkpoint-10/adapter_config.json
@@ -20,10 +20,10 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "k_proj",
     "v_proj",
-    "q_proj",
     "o_proj",
-    "k_proj"
+    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,
diff --git a/checkpoint-10/adapter_model.safetensors b/checkpoint-10/adapter_model.safetensors
index 3602a4d36d9e2b76fde36e4378e7ca0bb641aa41..5c2ebe7a9d782b318f37eeb0942020de6ef351c9 100644
--- a/checkpoint-10/adapter_model.safetensors
+++ b/checkpoint-10/adapter_model.safetensors
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:83bdb12be1e8a3735b34051dd69cbe3689aa855355eecde29c57851904d81ecf
+oid sha256:232e478a97bd908880b31806be86d4debfd0492ee0eb83ba509e3293c6c3565a
 size 67143296
diff --git a/checkpoint-10/optimizer.pt b/checkpoint-10/optimizer.pt
index fc8e9df2aaea2f8b4c4b743a6915e923c7f3e01a..f6aa09b5a5c07210fa7f9655cd844f149c370ddd 100644
--- a/checkpoint-10/optimizer.pt
+++ b/checkpoint-10/optimizer.pt
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6a7148ac4a39214d064ffce80fb50e3dd973562f331727ef157843d938bed4c7
+oid sha256:40afa20170f291b125eb81d0ffabecaf09477598d9e256ea01ff0b97e18e70a9
 size 134433530
diff --git a/checkpoint-10/scheduler.pt b/checkpoint-10/scheduler.pt
index 3f66941942fe4ae2d6b0f5794daec11d0b26e37b..941ed765630f615d588fd402f4d9cc96b1946236 100644
--- a/checkpoint-10/scheduler.pt
+++ b/checkpoint-10/scheduler.pt
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:657ce80fb8c4abe8e223e56f6ece2f925cddc4720e917ac9a1e7ce94ab2749a8
+oid sha256:a35af14c8f81087292db706fc180cce03fb0f692ed151b0664293b217dd11fa5
 size 1064
diff --git a/checkpoint-10/trainer_state.json b/checkpoint-10/trainer_state.json
index 4c77a4bc2a30fa60428c0e5ce619ff776b69241c..5e5b97c2c1c2cda602de8084d07f4f9e745d299b 100644
--- a/checkpoint-10/trainer_state.json
+++ b/checkpoint-10/trainer_state.json
@@ -1,5 +1,5 @@
 {
-  "best_metric": 1.769914984703064,
+  "best_metric": 1.733155369758606,
   "best_model_checkpoint": "/kaggle/working/checkpoint-10",
   "epoch": 1.1111111111111112,
   "eval_steps": 10,
@@ -10,24 +10,24 @@
   "log_history": [
     {
       "epoch": 1.1111111111111112,
-      "grad_norm": 0.022207504138350487,
-      "learning_rate": 0.00012592592592592592,
-      "loss": 2.058,
+      "grad_norm": 0.022282764315605164,
+      "learning_rate": 0.0001851851851851852,
+      "loss": 2.0424,
       "step": 10
     },
     {
       "epoch": 1.1111111111111112,
-      "eval_loss": 1.769914984703064,
-      "eval_runtime": 34.8115,
-      "eval_samples_per_second": 1.034,
-      "eval_steps_per_second": 0.144,
+      "eval_loss": 1.733155369758606,
+      "eval_runtime": 34.5543,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
       "step": 10
     }
   ],
   "logging_steps": 10,
-  "max_steps": 27,
+  "max_steps": 135,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 3,
+  "num_train_epochs": 15,
   "save_steps": 10,
   "stateful_callbacks": {
     "EarlyStoppingCallback": {
diff --git a/checkpoint-10/training_args.bin b/checkpoint-10/training_args.bin
index ac046aa3239775eec549604f7b57242cef974c25..db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc 100644
--- a/checkpoint-10/training_args.bin
+++ b/checkpoint-10/training_args.bin
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6729829c35b3a85ce1b9965237f3a3b4ff249959cfad240042e0219e283812ed
+oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43
 size 5112
diff --git a/checkpoint-100/README.md b/checkpoint-100/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2d1596ffe16e4d5bdcdf0e1d4322e6667af95962
--- /dev/null
+++ b/checkpoint-100/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: TheBloke/Llama-2-7B-fp16
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.12.0
\ No newline at end of file
diff --git a/checkpoint-100/adapter_config.json b/checkpoint-100/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cbf93f2809e43fe18fd6ad23406293a68e7f5c98
--- /dev/null
+++ b/checkpoint-100/adapter_config.json
@@ -0,0 +1,31 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "TheBloke/Llama-2-7B-fp16",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "o_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-100/adapter_model.safetensors b/checkpoint-100/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5474456dc9191482da68a63e25fc4b57741ddb66
--- /dev/null
+++ b/checkpoint-100/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac3b59826a91a331332b5850491ffec38f48afde058dead68205fb9903924aac
+size 67143296
diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ed26cd0b52ff12b00beef892c62747154ff73280
--- /dev/null
+++ b/checkpoint-100/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49ff3c81928bee92eb73c79e2f6088612cc35f2fab427d0f73ba21269e3c8085
+size 134433530
diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..bda67fd2eea9e9037427984468dca53fb7064feb
--- /dev/null
+++ b/checkpoint-100/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad9fcf2e86ce0f113dac0c406c0b49ce72f5891aaa3942215cde240415672553
+size 14244
diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4edc88d9af1e5877cdf4a912cba5fd5dd30760f3
--- /dev/null
+++ b/checkpoint-100/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a67071c9831c7625e547eac0c0538006ee7fe06d1b1052844fd1cdb5172b8b9f
+size 1064
diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..7c2f7daa1437992d332458338001a59717ed5348
--- /dev/null
+++ b/checkpoint-100/trainer_state.json
@@ -0,0 +1,192 @@
+{
+  "best_metric": 1.2115424871444702,
+  "best_model_checkpoint": "/kaggle/working/checkpoint-90",
+  "epoch": 11.11111111111111,
+  "eval_steps": 10,
+  "global_step": 100,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.1111111111111112,
+      "grad_norm": 0.022282764315605164,
+      "learning_rate": 0.0001851851851851852,
+      "loss": 2.0424,
+      "step": 10
+    },
+    {
+      "epoch": 1.1111111111111112,
+      "eval_loss": 1.733155369758606,
+      "eval_runtime": 34.5543,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 10
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "grad_norm": 0.018981408327817917,
+      "learning_rate": 0.00017037037037037037,
+      "loss": 1.6072,
+      "step": 20
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "eval_loss": 1.5428930521011353,
+      "eval_runtime": 34.6485,
+      "eval_samples_per_second": 1.039,
+      "eval_steps_per_second": 0.144,
+      "step": 20
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "grad_norm": 0.023157037794589996,
+      "learning_rate": 0.00015555555555555556,
+      "loss": 1.4025,
+      "step": 30
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "eval_loss": 1.4176721572875977,
+      "eval_runtime": 34.5433,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 30
+    },
+    {
+      "epoch": 4.444444444444445,
+      "grad_norm": 0.021338749676942825,
+      "learning_rate": 0.00014074074074074076,
+      "loss": 1.285,
+      "step": 40
+    },
+    {
+      "epoch": 4.444444444444445,
+      "eval_loss": 1.3449772596359253,
+      "eval_runtime": 34.5594,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 40
+    },
+    {
+      "epoch": 5.555555555555555,
+      "grad_norm": 0.02489505708217621,
+      "learning_rate": 0.00012592592592592592,
+      "loss": 1.1687,
+      "step": 50
+    },
+    {
+      "epoch": 5.555555555555555,
+      "eval_loss": 1.2951068878173828,
+      "eval_runtime": 34.5896,
+      "eval_samples_per_second": 1.041,
+      "eval_steps_per_second": 0.145,
+      "step": 50
+    },
+    {
+      "epoch": 6.666666666666667,
+      "grad_norm": 0.028962766751646996,
+      "learning_rate": 0.00011111111111111112,
+      "loss": 1.0521,
+      "step": 60
+    },
+    {
+      "epoch": 6.666666666666667,
+      "eval_loss": 1.2674343585968018,
+      "eval_runtime": 34.5586,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 60
+    },
+    {
+      "epoch": 7.777777777777778,
+      "grad_norm": 0.033917125314474106,
+      "learning_rate": 9.62962962962963e-05,
+      "loss": 0.9885,
+      "step": 70
+    },
+    {
+      "epoch": 7.777777777777778,
+      "eval_loss": 1.2424466609954834,
+      "eval_runtime": 34.5412,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 70
+    },
+    {
+      "epoch": 8.88888888888889,
+      "grad_norm": 0.03393130004405975,
+      "learning_rate": 8.148148148148148e-05,
+      "loss": 0.8784,
+      "step": 80
+    },
+    {
+      "epoch": 8.88888888888889,
+      "eval_loss": 1.2252851724624634,
+      "eval_runtime": 34.58,
+      "eval_samples_per_second": 1.041,
+      "eval_steps_per_second": 0.145,
+      "step": 80
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 0.04081139340996742,
+      "learning_rate": 6.666666666666667e-05,
+      "loss": 0.8154,
+      "step": 90
+    },
+    {
+      "epoch": 10.0,
+      "eval_loss": 1.2115424871444702,
+      "eval_runtime": 34.5784,
+      "eval_samples_per_second": 1.041,
+      "eval_steps_per_second": 0.145,
+      "step": 90
+    },
+    {
+      "epoch": 11.11111111111111,
+      "grad_norm": 0.04114004969596863,
+      "learning_rate": 5.185185185185185e-05,
+      "loss": 0.7376,
+      "step": 100
+    },
+    {
+      "epoch": 11.11111111111111,
+      "eval_loss": 1.2147088050842285,
+      "eval_runtime": 34.595,
+      "eval_samples_per_second": 1.041,
+      "eval_steps_per_second": 0.145,
+      "step": 100
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 135,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 15,
+  "save_steps": 10,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 3,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.6718311886422016e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc
--- /dev/null
+++ b/checkpoint-100/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43
+size 5112
diff --git a/checkpoint-110/README.md b/checkpoint-110/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2d1596ffe16e4d5bdcdf0e1d4322e6667af95962
--- /dev/null
+++ b/checkpoint-110/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: TheBloke/Llama-2-7B-fp16
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.12.0
\ No newline at end of file
diff --git a/checkpoint-110/adapter_config.json b/checkpoint-110/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cbf93f2809e43fe18fd6ad23406293a68e7f5c98
--- /dev/null
+++ b/checkpoint-110/adapter_config.json
@@ -0,0 +1,31 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "TheBloke/Llama-2-7B-fp16",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "o_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-110/adapter_model.safetensors b/checkpoint-110/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7f605b66a87175ab6628e4ed30bb65e58cfd30b5
--- /dev/null
+++ b/checkpoint-110/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be7b95474c7a25c6961db7fe4913e88e0e78819b321a21a383b179782c22ef6c
+size 67143296
diff --git a/checkpoint-110/optimizer.pt b/checkpoint-110/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..128e5885bb3a9ff0f3a9e5377049be1f310e384b
--- /dev/null
+++ b/checkpoint-110/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:567bf2cdba466fb3da7301567fc6eee0fd77e99808ab9402911468a94017eb0a
+size 134433530
diff --git a/checkpoint-110/rng_state.pth b/checkpoint-110/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c7d7b7fbbf5033245a0164d48dee7aac7380c478
--- /dev/null
+++ b/checkpoint-110/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b74e65bff4ecaf37ea6199e51bc5b668d4b069c4bc042e44eeac5d4c4ffef401
+size 14244
diff --git a/checkpoint-110/scheduler.pt b/checkpoint-110/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..70cd77658255ba7e2dd9a0fdcb8ac3766d0b1df3
--- /dev/null
+++ b/checkpoint-110/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c7403172deaf546b51410d54cab636a5c53264be17aa6a439e5934523944587
+size 1064
diff --git a/checkpoint-110/trainer_state.json b/checkpoint-110/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..3cfcc9fcbe02f92fe34d4c23ddc019a88d9a5400
--- /dev/null
+++ b/checkpoint-110/trainer_state.json
@@ -0,0 +1,207 @@
+{
+  "best_metric": 1.2115424871444702,
+  "best_model_checkpoint": "/kaggle/working/checkpoint-90",
+  "epoch": 12.222222222222221,
+  "eval_steps": 10,
+  "global_step": 110,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.1111111111111112,
+      "grad_norm": 0.022282764315605164,
+      "learning_rate": 0.0001851851851851852,
+      "loss": 2.0424,
+      "step": 10
+    },
+    {
+      "epoch": 1.1111111111111112,
+      "eval_loss": 1.733155369758606,
+      "eval_runtime": 34.5543,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 10
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "grad_norm": 0.018981408327817917,
+      "learning_rate": 0.00017037037037037037,
+      "loss": 1.6072,
+      "step": 20
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "eval_loss": 1.5428930521011353,
+      "eval_runtime": 34.6485,
+      "eval_samples_per_second": 1.039,
+      "eval_steps_per_second": 0.144,
+      "step": 20
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "grad_norm": 0.023157037794589996,
+      "learning_rate": 0.00015555555555555556,
+      "loss": 1.4025,
+      "step": 30
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "eval_loss": 1.4176721572875977,
+      "eval_runtime": 34.5433,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 30
+    },
+    {
+      "epoch": 4.444444444444445,
+      "grad_norm": 0.021338749676942825,
+      "learning_rate": 0.00014074074074074076,
+      "loss": 1.285,
+      "step": 40
+    },
+    {
+      "epoch": 4.444444444444445,
+      "eval_loss": 1.3449772596359253,
+      "eval_runtime": 34.5594,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 40
+    },
+    {
+      "epoch": 5.555555555555555,
+      "grad_norm": 0.02489505708217621,
+      "learning_rate": 0.00012592592592592592,
+      "loss": 1.1687,
+      "step": 50
+    },
+    {
+      "epoch": 5.555555555555555,
+      "eval_loss": 1.2951068878173828,
+      "eval_runtime": 34.5896,
+      "eval_samples_per_second": 1.041,
+      "eval_steps_per_second": 0.145,
+      "step": 50
+    },
+    {
+      "epoch": 6.666666666666667,
+      "grad_norm": 0.028962766751646996,
+      "learning_rate": 0.00011111111111111112,
+      "loss": 1.0521,
+      "step": 60
+    },
+    {
+      "epoch": 6.666666666666667,
+      "eval_loss": 1.2674343585968018,
+      "eval_runtime": 34.5586,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 60
+    },
+    {
+      "epoch": 7.777777777777778,
+      "grad_norm": 0.033917125314474106,
+      "learning_rate": 9.62962962962963e-05,
+      "loss": 0.9885,
+      "step": 70
+    },
+    {
+      "epoch": 7.777777777777778,
+      "eval_loss": 1.2424466609954834,
+      "eval_runtime": 34.5412,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 70
+    },
+    {
+      "epoch": 8.88888888888889,
+      "grad_norm": 0.03393130004405975,
+      "learning_rate": 8.148148148148148e-05,
+      "loss": 0.8784,
+      "step": 80
+    },
+    {
+      "epoch": 8.88888888888889,
+      "eval_loss": 1.2252851724624634,
+      "eval_runtime": 34.58,
+      "eval_samples_per_second": 1.041,
+      "eval_steps_per_second": 0.145,
+      "step": 80
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 0.04081139340996742,
+      "learning_rate": 6.666666666666667e-05,
+      "loss": 0.8154,
+      "step": 90
+    },
+    {
+      "epoch": 10.0,
+      "eval_loss": 1.2115424871444702,
+      "eval_runtime": 34.5784,
+      "eval_samples_per_second": 1.041,
+      "eval_steps_per_second": 0.145,
+      "step": 90
+    },
+    {
+      "epoch": 11.11111111111111,
+      "grad_norm": 0.04114004969596863,
+      "learning_rate": 5.185185185185185e-05,
+      "loss": 0.7376,
+      "step": 100
+    },
+    {
+      "epoch": 11.11111111111111,
+      "eval_loss": 1.2147088050842285,
+      "eval_runtime": 34.595,
+      "eval_samples_per_second": 1.041,
+      "eval_steps_per_second": 0.145,
+      "step": 100
+    },
+    {
+      "epoch": 12.222222222222221,
+      "grad_norm": 0.04217207431793213,
+      "learning_rate": 3.7037037037037037e-05,
+      "loss": 0.6642,
+      "step": 110
+    },
+    {
+      "epoch": 12.222222222222221,
+      "eval_loss": 1.2141155004501343,
+      "eval_runtime": 34.6053,
+      "eval_samples_per_second": 1.04,
+      "eval_steps_per_second": 0.144,
+      "step": 110
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 135,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 15,
+  "save_steps": 10,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 3,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.93951986040832e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-110/training_args.bin b/checkpoint-110/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc
--- /dev/null
+++ b/checkpoint-110/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43
+size 5112
diff --git a/checkpoint-120/README.md b/checkpoint-120/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2d1596ffe16e4d5bdcdf0e1d4322e6667af95962
--- /dev/null
+++ b/checkpoint-120/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: TheBloke/Llama-2-7B-fp16
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.12.0
\ No newline at end of file
diff --git a/checkpoint-120/adapter_config.json b/checkpoint-120/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cbf93f2809e43fe18fd6ad23406293a68e7f5c98
--- /dev/null
+++ b/checkpoint-120/adapter_config.json
@@ -0,0 +1,31 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "TheBloke/Llama-2-7B-fp16",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "o_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-120/adapter_model.safetensors b/checkpoint-120/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b0331d3326f9fe8e6101bed734cda4120704beb5
--- /dev/null
+++ b/checkpoint-120/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb1690155a5e4a4452a0e292d686003b08ac457e37c20099c948aae55ca8e453
+size 67143296
diff --git a/checkpoint-120/optimizer.pt b/checkpoint-120/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..92ef0684e214370f01a6b0138c929044159067b4
--- /dev/null
+++ b/checkpoint-120/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:14cc791678f8ce58b5f8b40f05113973aa7f94325c356b74d4df44ff8c1a956a
+size 134433530
diff --git a/checkpoint-120/rng_state.pth b/checkpoint-120/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..89a481768021c3826e125da45856bda83012f974
--- /dev/null
+++ b/checkpoint-120/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:899181c47750d0177e5f1571bc0ad909b3a3ebab597f384c5023f1d8dcb8a73a
+size 14244
diff --git a/checkpoint-120/scheduler.pt b/checkpoint-120/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1d230859237cac17ad8b06f8e53289cfacddd780
--- /dev/null
+++ b/checkpoint-120/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f7846da062f26e398f1065295f9c7cbaf4768d3aa6b5518863ce89b7eb9d328e
+size 1064
diff --git a/checkpoint-120/trainer_state.json b/checkpoint-120/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..1d0d62df39a8ef027f9eb6cb16e365592f41f1b1
--- /dev/null
+++ b/checkpoint-120/trainer_state.json
@@ -0,0 +1,222 @@
+{
+  "best_metric": 1.20501708984375,
+  "best_model_checkpoint": "/kaggle/working/checkpoint-120",
+  "epoch": 13.333333333333334,
+  "eval_steps": 10,
+  "global_step": 120,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.1111111111111112,
+      "grad_norm": 0.022282764315605164,
+      "learning_rate": 0.0001851851851851852,
+      "loss": 2.0424,
+      "step": 10
+    },
+    {
+      "epoch": 1.1111111111111112,
+      "eval_loss": 1.733155369758606,
+      "eval_runtime": 34.5543,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 10
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "grad_norm": 0.018981408327817917,
+      "learning_rate": 0.00017037037037037037,
+      "loss": 1.6072,
+      "step": 20
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "eval_loss": 1.5428930521011353,
+      "eval_runtime": 34.6485,
+      "eval_samples_per_second": 1.039,
+      "eval_steps_per_second": 0.144,
+      "step": 20
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "grad_norm": 0.023157037794589996,
+      "learning_rate": 0.00015555555555555556,
+      "loss": 1.4025,
+      "step": 30
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "eval_loss": 1.4176721572875977,
+      "eval_runtime": 34.5433,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 30
+    },
+    {
+      "epoch": 4.444444444444445,
+      "grad_norm": 0.021338749676942825,
+      "learning_rate": 0.00014074074074074076,
+      "loss": 1.285,
+      "step": 40
+    },
+    {
+      "epoch": 4.444444444444445,
+      "eval_loss": 1.3449772596359253,
+      "eval_runtime": 34.5594,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 40
+    },
+    {
+      "epoch": 5.555555555555555,
+      "grad_norm": 0.02489505708217621,
+      "learning_rate": 0.00012592592592592592,
+      "loss": 1.1687,
+      "step": 50
+    },
+    {
+      "epoch": 5.555555555555555,
+      "eval_loss": 1.2951068878173828,
+      "eval_runtime": 34.5896,
+      "eval_samples_per_second": 1.041,
+      "eval_steps_per_second": 0.145,
+      "step": 50
+    },
+    {
+      "epoch": 6.666666666666667,
+      "grad_norm": 0.028962766751646996,
+      "learning_rate": 0.00011111111111111112,
+      "loss": 1.0521,
+      "step": 60
+    },
+    {
+      "epoch": 6.666666666666667,
+      "eval_loss": 1.2674343585968018,
+      "eval_runtime": 34.5586,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 60
+    },
+    {
+      "epoch": 7.777777777777778,
+      "grad_norm": 0.033917125314474106,
+      "learning_rate": 9.62962962962963e-05,
+      "loss": 0.9885,
+      "step": 70
+    },
+    {
+      "epoch": 7.777777777777778,
+      "eval_loss": 1.2424466609954834,
+      "eval_runtime": 34.5412,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 70
+    },
+    {
+      "epoch": 8.88888888888889,
+      "grad_norm": 0.03393130004405975,
+      "learning_rate": 8.148148148148148e-05,
+      "loss": 0.8784,
+      "step": 80
+    },
+    {
+      "epoch": 8.88888888888889,
+      "eval_loss": 1.2252851724624634,
+      "eval_runtime": 34.58,
+      "eval_samples_per_second": 1.041,
+      "eval_steps_per_second": 0.145,
+      "step": 80
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 0.04081139340996742,
+      "learning_rate": 6.666666666666667e-05,
+      "loss": 0.8154,
+      "step": 90
+    },
+    {
+      "epoch": 10.0,
+      "eval_loss": 1.2115424871444702,
+      "eval_runtime": 34.5784,
+      "eval_samples_per_second": 1.041,
+      "eval_steps_per_second": 0.145,
+      "step": 90
+    },
+    {
+      "epoch": 11.11111111111111,
+      "grad_norm": 0.04114004969596863,
+      "learning_rate": 5.185185185185185e-05,
+      "loss": 0.7376,
+      "step": 100
+    },
+    {
+      "epoch": 11.11111111111111,
+      "eval_loss": 1.2147088050842285,
+      "eval_runtime": 34.595,
+      "eval_samples_per_second": 1.041,
+      "eval_steps_per_second": 0.145,
+      "step": 100
+    },
+    {
+      "epoch": 12.222222222222221,
+      "grad_norm": 0.04217207431793213,
+      "learning_rate": 3.7037037037037037e-05,
+      "loss": 0.6642,
+      "step": 110
+    },
+    {
+      "epoch": 12.222222222222221,
+      "eval_loss": 1.2141155004501343,
+      "eval_runtime": 34.6053,
+      "eval_samples_per_second": 1.04,
+      "eval_steps_per_second": 0.144,
+      "step": 110
+    },
+    {
+      "epoch": 13.333333333333334,
+      "grad_norm": 0.04223904013633728,
+      "learning_rate": 2.2222222222222223e-05,
+      "loss": 0.6353,
+      "step": 120
+    },
+    {
+      "epoch": 13.333333333333334,
+      "eval_loss": 1.20501708984375,
+      "eval_runtime": 34.6447,
+      "eval_samples_per_second": 1.039,
+      "eval_steps_per_second": 0.144,
+      "step": 120
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 135,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 15,
+  "save_steps": 10,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 3,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3.2063182503346176e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-120/training_args.bin b/checkpoint-120/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc
--- /dev/null
+++ b/checkpoint-120/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43
+size 5112
diff --git a/checkpoint-130/README.md b/checkpoint-130/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2d1596ffe16e4d5bdcdf0e1d4322e6667af95962
--- /dev/null
+++ b/checkpoint-130/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: TheBloke/Llama-2-7B-fp16
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.12.0
\ No newline at end of file
diff --git a/checkpoint-130/adapter_config.json b/checkpoint-130/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cbf93f2809e43fe18fd6ad23406293a68e7f5c98
--- /dev/null
+++ b/checkpoint-130/adapter_config.json
@@ -0,0 +1,31 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "TheBloke/Llama-2-7B-fp16",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "o_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-130/adapter_model.safetensors b/checkpoint-130/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b4f9fb90e930a7899b68e1787dce408f742c6dde
--- /dev/null
+++ b/checkpoint-130/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58a8f71955b57e8e1082d6f6f180646592764af968e225bda2b732733e64a34c
+size 67143296
diff --git a/checkpoint-130/optimizer.pt b/checkpoint-130/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..11988fcadeca2673214a0285e4c846ac04d4f1ae
--- /dev/null
+++ b/checkpoint-130/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4daf30044a564bcf59ea2751408ea8dce0ec90f49d285e08583c20de07041a11
+size 134433530
diff --git a/checkpoint-130/rng_state.pth b/checkpoint-130/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..10c9d602a70402bac855f7680b6ab4917c70e41a
--- /dev/null
+++ b/checkpoint-130/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae41843bfbaf9bfb2da44e736c97098eebc3d8816f1d5ca8fbf5df92c5f45233
+size 14244
diff --git a/checkpoint-130/scheduler.pt b/checkpoint-130/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6cdb223c58ffea4e674469ccff687934d4d7f279
--- /dev/null
+++ b/checkpoint-130/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9b0d7953ae2a99e745d016a7542f93f08a2bf1f998ff2ee97a233ec022e1d0b
+size 1064
diff --git a/checkpoint-130/trainer_state.json b/checkpoint-130/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..f86474c05c992b2a7868efa880e894f56bb20e75
--- /dev/null
+++ b/checkpoint-130/trainer_state.json
@@ -0,0 +1,237 @@
+{
+  "best_metric": 1.2035548686981201,
+  "best_model_checkpoint": "/kaggle/working/checkpoint-130",
+  "epoch": 14.444444444444445,
+  "eval_steps": 10,
+  "global_step": 130,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.1111111111111112,
+      "grad_norm": 0.022282764315605164,
+      "learning_rate": 0.0001851851851851852,
+      "loss": 2.0424,
+      "step": 10
+    },
+    {
+      "epoch": 1.1111111111111112,
+      "eval_loss": 1.733155369758606,
+      "eval_runtime": 34.5543,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 10
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "grad_norm": 0.018981408327817917,
+      "learning_rate": 0.00017037037037037037,
+      "loss": 1.6072,
+      "step": 20
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "eval_loss": 1.5428930521011353,
+      "eval_runtime": 34.6485,
+      "eval_samples_per_second": 1.039,
+      "eval_steps_per_second": 0.144,
+      "step": 20
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "grad_norm": 0.023157037794589996,
+      "learning_rate": 0.00015555555555555556,
+      "loss": 1.4025,
+      "step": 30
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "eval_loss": 1.4176721572875977,
+      "eval_runtime": 34.5433,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 30
+    },
+    {
+      "epoch": 4.444444444444445,
+      "grad_norm": 0.021338749676942825,
+      "learning_rate": 0.00014074074074074076,
+      "loss": 1.285,
+      "step": 40
+    },
+    {
+      "epoch": 4.444444444444445,
+      "eval_loss": 1.3449772596359253,
+      "eval_runtime": 34.5594,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 40
+    },
+    {
+      "epoch": 5.555555555555555,
+      "grad_norm": 0.02489505708217621,
+      "learning_rate": 0.00012592592592592592,
+      "loss": 1.1687,
+      "step": 50
+    },
+    {
+      "epoch": 5.555555555555555,
+      "eval_loss": 1.2951068878173828,
+      "eval_runtime": 34.5896,
+      "eval_samples_per_second": 1.041,
+      "eval_steps_per_second": 0.145,
+      "step": 50
+    },
+    {
+      "epoch": 6.666666666666667,
+      "grad_norm": 0.028962766751646996,
+      "learning_rate": 0.00011111111111111112,
+      "loss": 1.0521,
+      "step": 60
+    },
+    {
+      "epoch": 6.666666666666667,
+      "eval_loss": 1.2674343585968018,
+      "eval_runtime": 34.5586,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 60
+    },
+    {
+      "epoch": 7.777777777777778,
+      "grad_norm": 0.033917125314474106,
+      "learning_rate": 9.62962962962963e-05,
+      "loss": 0.9885,
+      "step": 70
+    },
+    {
+      "epoch": 7.777777777777778,
+      "eval_loss": 1.2424466609954834,
+      "eval_runtime": 34.5412,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 70
+    },
+    {
+      "epoch": 8.88888888888889,
+      "grad_norm": 0.03393130004405975,
+      "learning_rate": 8.148148148148148e-05,
+      "loss": 0.8784,
+      "step": 80
+    },
+    {
+      "epoch": 8.88888888888889,
+      "eval_loss": 1.2252851724624634,
+      "eval_runtime": 34.58,
+      "eval_samples_per_second": 1.041,
+      "eval_steps_per_second": 0.145,
+      "step": 80
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 0.04081139340996742,
+      "learning_rate": 6.666666666666667e-05,
+      "loss": 0.8154,
+      "step": 90
+    },
+    {
+      "epoch": 10.0,
+      "eval_loss": 1.2115424871444702,
+      "eval_runtime": 34.5784,
+      "eval_samples_per_second": 1.041,
+      "eval_steps_per_second": 0.145,
+      "step": 90
+    },
+    {
+      "epoch": 11.11111111111111,
+      "grad_norm": 0.04114004969596863,
+      "learning_rate": 5.185185185185185e-05,
+      "loss": 0.7376,
+      "step": 100
+    },
+    {
+      "epoch": 11.11111111111111,
+      "eval_loss": 1.2147088050842285,
+      "eval_runtime": 34.595,
+      "eval_samples_per_second": 1.041,
+      "eval_steps_per_second": 0.145,
+      "step": 100
+    },
+    {
+      "epoch": 12.222222222222221,
+      "grad_norm": 0.04217207431793213,
+      "learning_rate": 3.7037037037037037e-05,
+      "loss": 0.6642,
+      "step": 110
+    },
+    {
+      "epoch": 12.222222222222221,
+      "eval_loss": 1.2141155004501343,
+      "eval_runtime": 34.6053,
+      "eval_samples_per_second": 1.04,
+      "eval_steps_per_second": 0.144,
+      "step": 110
+    },
+    {
+      "epoch": 13.333333333333334,
+      "grad_norm": 0.04223904013633728,
+      "learning_rate": 2.2222222222222223e-05,
+      "loss": 0.6353,
+      "step": 120
+    },
+    {
+      "epoch": 13.333333333333334,
+      "eval_loss": 1.20501708984375,
+      "eval_runtime": 34.6447,
+      "eval_samples_per_second": 1.039,
+      "eval_steps_per_second": 0.144,
+      "step": 120
+    },
+    {
+      "epoch": 14.444444444444445,
+      "grad_norm": 0.04531875252723694,
+      "learning_rate": 7.4074074074074075e-06,
+      "loss": 0.5992,
+      "step": 130
+    },
+    {
+      "epoch": 14.444444444444445,
+      "eval_loss": 1.2035548686981201,
+      "eval_runtime": 34.5919,
+      "eval_samples_per_second": 1.041,
+      "eval_steps_per_second": 0.145,
+      "step": 130
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 135,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 15,
+  "save_steps": 10,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 3,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3.474229492560691e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-130/training_args.bin b/checkpoint-130/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc
--- /dev/null
+++ b/checkpoint-130/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43
+size 5112
diff --git a/checkpoint-20/README.md b/checkpoint-20/README.md
index d94fa0fb36d5aa76962cf3fda3ca0bfe4c2fa517..2d1596ffe16e4d5bdcdf0e1d4322e6667af95962 100644
--- a/checkpoint-20/README.md
+++ b/checkpoint-20/README.md
@@ -1,6 +1,6 @@
 ---
-base_model: TheBloke/Llama-2-7B-fp16
 library_name: peft
+base_model: TheBloke/Llama-2-7B-fp16
 ---
 
 # Model Card for Model ID
diff --git a/checkpoint-20/adapter_config.json b/checkpoint-20/adapter_config.json
index 923a080092a0575b2d3304503832fcf7c77462f4..cbf93f2809e43fe18fd6ad23406293a68e7f5c98 100644
--- a/checkpoint-20/adapter_config.json
+++ b/checkpoint-20/adapter_config.json
@@ -20,10 +20,10 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "k_proj",
     "v_proj",
-    "q_proj",
     "o_proj",
-    "k_proj"
+    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,
diff --git a/checkpoint-20/adapter_model.safetensors b/checkpoint-20/adapter_model.safetensors
index 06513132c03fe764076772f7ca590751400c2853..8223e0ce8d841941324b29f8c42fc45abdc14765 100644
--- a/checkpoint-20/adapter_model.safetensors
+++ b/checkpoint-20/adapter_model.safetensors
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:06e781f63931f2a767880f6d4b91f140eb5043f4a642a1a68d8adbb81bbcae9d
+oid sha256:b330fca16a5ab17049bdd4e6e42df52bd8bd0ac1fbeb1577f683e7b1212f0d83
 size 67143296
diff --git a/checkpoint-20/optimizer.pt b/checkpoint-20/optimizer.pt
index c1124b3ffb9edcb96b7c30f63f1ce1072adfe1a0..f4d35d60066f0933a1fd192740c0a094a9d75120 100644
--- a/checkpoint-20/optimizer.pt
+++ b/checkpoint-20/optimizer.pt
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3c2d558714cf4e61497566f22afcd3dd08e9e361ad958db0e013352981bfea11
+oid sha256:83e5faf17f41fc7dcd6907ecd97bc5e5b82bfa377a6683ddd9d25e9c5a4f3efd
 size 134433530
diff --git a/checkpoint-20/scheduler.pt b/checkpoint-20/scheduler.pt
index b763c3c2c2e02a82b650f9eee98c8def87c1ad7b..4f536414baf7d040b7ee6765e42c28f6d23eb06d 100644
--- a/checkpoint-20/scheduler.pt
+++ b/checkpoint-20/scheduler.pt
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ebdc6c179af92eaaefe04390084f5c059665a54c6acce66616e26194c4f7008a
+oid sha256:18bf434d2955f164730aed9c7994cdd6c773ed80052fc3ef975fae4adef61283
 size 1064
diff --git a/checkpoint-20/trainer_state.json b/checkpoint-20/trainer_state.json
index 6cf871e71df9c7807e59faaa6a247dccce1e7a9c..fbb6a3c50e3d893d3f94028d5087a658e6bbc7c2 100644
--- a/checkpoint-20/trainer_state.json
+++ b/checkpoint-20/trainer_state.json
@@ -1,5 +1,5 @@
 {
-  "best_metric": 1.6234142780303955,
+  "best_metric": 1.5428930521011353,
   "best_model_checkpoint": "/kaggle/working/checkpoint-20",
   "epoch": 2.2222222222222223,
   "eval_steps": 10,
@@ -10,39 +10,39 @@
   "log_history": [
     {
       "epoch": 1.1111111111111112,
-      "grad_norm": 0.022207504138350487,
-      "learning_rate": 0.00012592592592592592,
-      "loss": 2.058,
+      "grad_norm": 0.022282764315605164,
+      "learning_rate": 0.0001851851851851852,
+      "loss": 2.0424,
       "step": 10
     },
     {
       "epoch": 1.1111111111111112,
-      "eval_loss": 1.769914984703064,
-      "eval_runtime": 34.8115,
-      "eval_samples_per_second": 1.034,
-      "eval_steps_per_second": 0.144,
+      "eval_loss": 1.733155369758606,
+      "eval_runtime": 34.5543,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
       "step": 10
     },
     {
       "epoch": 2.2222222222222223,
-      "grad_norm": 0.022052476182579994,
-      "learning_rate": 5.185185185185185e-05,
-      "loss": 1.6674,
+      "grad_norm": 0.018981408327817917,
+      "learning_rate": 0.00017037037037037037,
+      "loss": 1.6072,
       "step": 20
     },
     {
       "epoch": 2.2222222222222223,
-      "eval_loss": 1.6234142780303955,
-      "eval_runtime": 34.7607,
-      "eval_samples_per_second": 1.036,
+      "eval_loss": 1.5428930521011353,
+      "eval_runtime": 34.6485,
+      "eval_samples_per_second": 1.039,
       "eval_steps_per_second": 0.144,
       "step": 20
     }
   ],
   "logging_steps": 10,
-  "max_steps": 27,
+  "max_steps": 135,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 3,
+  "num_train_epochs": 15,
   "save_steps": 10,
   "stateful_callbacks": {
     "EarlyStoppingCallback": {
diff --git a/checkpoint-20/training_args.bin b/checkpoint-20/training_args.bin
index ac046aa3239775eec549604f7b57242cef974c25..db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc 100644
--- a/checkpoint-20/training_args.bin
+++ b/checkpoint-20/training_args.bin
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6729829c35b3a85ce1b9965237f3a3b4ff249959cfad240042e0219e283812ed
+oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43
 size 5112
diff --git a/checkpoint-30/README.md b/checkpoint-30/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2d1596ffe16e4d5bdcdf0e1d4322e6667af95962
--- /dev/null
+++ b/checkpoint-30/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: TheBloke/Llama-2-7B-fp16
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.12.0
\ No newline at end of file
diff --git a/checkpoint-30/adapter_config.json b/checkpoint-30/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cbf93f2809e43fe18fd6ad23406293a68e7f5c98
--- /dev/null
+++ b/checkpoint-30/adapter_config.json
@@ -0,0 +1,31 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "TheBloke/Llama-2-7B-fp16",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "o_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-30/adapter_model.safetensors b/checkpoint-30/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2dde285768e1a4318a4612f90065ca618c5bdffc
--- /dev/null
+++ b/checkpoint-30/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:473c4bd6da7c40c632f6b8a627a42ab5e26f5bc3b977b422287013a48424e372
+size 67143296
diff --git a/checkpoint-30/optimizer.pt b/checkpoint-30/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c08c60df1dba8ab4b623f9330e698d2be010f492
--- /dev/null
+++ b/checkpoint-30/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:09a69f8968f0b0dc5be6245d8969030cda587b4b349b62c8e5c647bd89875fa2
+size 134433530
diff --git a/checkpoint-30/rng_state.pth b/checkpoint-30/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9d5d7d167cef9e301898f6825f88ed06664db22b
--- /dev/null
+++ b/checkpoint-30/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5db3958c8bba66b9da4e7dd565b9b2233d51d750c94fc093ab71c68c8d043d29
+size 14244
diff --git a/checkpoint-30/scheduler.pt b/checkpoint-30/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fc722270e58bfcbf5b68dd4051aa690f4a07fbeb
--- /dev/null
+++ b/checkpoint-30/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d8a3ad18d2d099317927f68afa34f1d799ef6644ec4025b52033947a650a29e2
+size 1064
diff --git a/checkpoint-30/trainer_state.json b/checkpoint-30/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..e6276205b85275a723731d05c8c327a2a3db50b4
--- /dev/null
+++ b/checkpoint-30/trainer_state.json
@@ -0,0 +1,87 @@
+{
+  "best_metric": 1.4176721572875977,
+  "best_model_checkpoint": "/kaggle/working/checkpoint-30",
+  "epoch": 3.3333333333333335,
+  "eval_steps": 10,
+  "global_step": 30,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.1111111111111112,
+      "grad_norm": 0.022282764315605164,
+      "learning_rate": 0.0001851851851851852,
+      "loss": 2.0424,
+      "step": 10
+    },
+    {
+      "epoch": 1.1111111111111112,
+      "eval_loss": 1.733155369758606,
+      "eval_runtime": 34.5543,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 10
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "grad_norm": 0.018981408327817917,
+      "learning_rate": 0.00017037037037037037,
+      "loss": 1.6072,
+      "step": 20
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "eval_loss": 1.5428930521011353,
+      "eval_runtime": 34.6485,
+      "eval_samples_per_second": 1.039,
+      "eval_steps_per_second": 0.144,
+      "step": 20
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "grad_norm": 0.023157037794589996,
+      "learning_rate": 0.00015555555555555556,
+      "loss": 1.4025,
+      "step": 30
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "eval_loss": 1.4176721572875977,
+      "eval_runtime": 34.5433,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 30
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 135,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 15,
+  "save_steps": 10,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 3,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 8007449233588224.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-30/training_args.bin b/checkpoint-30/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc
--- /dev/null
+++ b/checkpoint-30/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43
+size 5112
diff --git a/checkpoint-40/README.md b/checkpoint-40/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2d1596ffe16e4d5bdcdf0e1d4322e6667af95962
--- /dev/null
+++ b/checkpoint-40/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: TheBloke/Llama-2-7B-fp16
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.12.0
\ No newline at end of file
diff --git a/checkpoint-40/adapter_config.json b/checkpoint-40/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cbf93f2809e43fe18fd6ad23406293a68e7f5c98
--- /dev/null
+++ b/checkpoint-40/adapter_config.json
@@ -0,0 +1,31 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "TheBloke/Llama-2-7B-fp16",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "o_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-40/adapter_model.safetensors b/checkpoint-40/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d92565a5b837388e70e09742cd836ed04bcbaafb
--- /dev/null
+++ b/checkpoint-40/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:801d3ce570c9b9ff575f960af3a00ccd6ba6d98cf418a3b2d0dc14a716f5dc87
+size 67143296
diff --git a/checkpoint-40/optimizer.pt b/checkpoint-40/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7bbafe6bb0c6557cdfc17a015eac9b12c17845c9
--- /dev/null
+++ b/checkpoint-40/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:67c5b4e8da000e9b0681d015858d5f844f66483488fdbbc351086278b492e7ad
+size 134433530
diff --git a/checkpoint-40/rng_state.pth b/checkpoint-40/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..38dc6b1b002ae88565c9cbb1471af25537930750
--- /dev/null
+++ b/checkpoint-40/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:30d6e73f3b9fa7d8e374b585bd95a623039b7a42a7db62f2bdba34e6c0b4145f
+size 14244
diff --git a/checkpoint-40/scheduler.pt b/checkpoint-40/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2d7557681fbd1043af6145a07414467654c47a85
--- /dev/null
+++ b/checkpoint-40/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb020d6653a8f320452374f8532b2d5f261a1314bca08cdf554ed4cd89610334
+size 1064
diff --git a/checkpoint-40/trainer_state.json b/checkpoint-40/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..86d21c3a21736393bc1efdc29445b1408623b62b
--- /dev/null
+++ b/checkpoint-40/trainer_state.json
@@ -0,0 +1,102 @@
+{
+  "best_metric": 1.3449772596359253,
+  "best_model_checkpoint": "/kaggle/working/checkpoint-40",
+  "epoch": 4.444444444444445,
+  "eval_steps": 10,
+  "global_step": 40,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.1111111111111112,
+      "grad_norm": 0.022282764315605164,
+      "learning_rate": 0.0001851851851851852,
+      "loss": 2.0424,
+      "step": 10
+    },
+    {
+      "epoch": 1.1111111111111112,
+      "eval_loss": 1.733155369758606,
+      "eval_runtime": 34.5543,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 10
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "grad_norm": 0.018981408327817917,
+      "learning_rate": 0.00017037037037037037,
+      "loss": 1.6072,
+      "step": 20
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "eval_loss": 1.5428930521011353,
+      "eval_runtime": 34.6485,
+      "eval_samples_per_second": 1.039,
+      "eval_steps_per_second": 0.144,
+      "step": 20
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "grad_norm": 0.023157037794589996,
+      "learning_rate": 0.00015555555555555556,
+      "loss": 1.4025,
+      "step": 30
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "eval_loss": 1.4176721572875977,
+      "eval_runtime": 34.5433,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 30
+    },
+    {
+      "epoch": 4.444444444444445,
+      "grad_norm": 0.021338749676942825,
+      "learning_rate": 0.00014074074074074076,
+      "loss": 1.285,
+      "step": 40
+    },
+    {
+      "epoch": 4.444444444444445,
+      "eval_loss": 1.3449772596359253,
+      "eval_runtime": 34.5594,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 40
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 135,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 15,
+  "save_steps": 10,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 3,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.0684017993449472e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-40/training_args.bin b/checkpoint-40/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc
--- /dev/null
+++ b/checkpoint-40/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43
+size 5112
diff --git a/checkpoint-50/README.md b/checkpoint-50/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2d1596ffe16e4d5bdcdf0e1d4322e6667af95962
--- /dev/null
+++ b/checkpoint-50/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: TheBloke/Llama-2-7B-fp16
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.12.0
\ No newline at end of file
diff --git a/checkpoint-50/adapter_config.json b/checkpoint-50/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cbf93f2809e43fe18fd6ad23406293a68e7f5c98
--- /dev/null
+++ b/checkpoint-50/adapter_config.json
@@ -0,0 +1,31 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "TheBloke/Llama-2-7B-fp16",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "o_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-50/adapter_model.safetensors b/checkpoint-50/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d0691d9e050f0256809f29f86e24616a0fc291e2
--- /dev/null
+++ b/checkpoint-50/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:462ccb4de1a6868b9c314278aa410416a2271f7f6242771f282ecb17db91aa9a
+size 67143296
diff --git a/checkpoint-50/optimizer.pt b/checkpoint-50/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ce706c076113ef5a730068e774f4cb258e862101
--- /dev/null
+++ b/checkpoint-50/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9cc1bc1ee11e2e518af04ea6813d8a018c28d601435ab40fb94b4429e7463a6f
+size 134433530
diff --git a/checkpoint-50/rng_state.pth b/checkpoint-50/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..eaa5f8c2b84b2e8b46fcadd43baec18ad8bfbbc7
--- /dev/null
+++ b/checkpoint-50/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:37f759d78abce0f2d67fdd5ef876de5fc9f78841bcec9f9875cf9d75dc7947b6
+size 14244
diff --git a/checkpoint-50/scheduler.pt b/checkpoint-50/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..66f83e8d1a29d1712842e93ebeca9509a1bf2fb0
--- /dev/null
+++ b/checkpoint-50/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:965332493d5d51377ce3dafe8e14c60e285cd11a8955550ce87e8ef7114ed890
+size 1064
diff --git a/checkpoint-50/trainer_state.json b/checkpoint-50/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..b4358c39fda6ddc437753ee9371e20b2c8bb340b
--- /dev/null
+++ b/checkpoint-50/trainer_state.json
@@ -0,0 +1,117 @@
+{
+  "best_metric": 1.2951068878173828,
+  "best_model_checkpoint": "/kaggle/working/checkpoint-50",
+  "epoch": 5.555555555555555,
+  "eval_steps": 10,
+  "global_step": 50,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.1111111111111112,
+      "grad_norm": 0.022282764315605164,
+      "learning_rate": 0.0001851851851851852,
+      "loss": 2.0424,
+      "step": 10
+    },
+    {
+      "epoch": 1.1111111111111112,
+      "eval_loss": 1.733155369758606,
+      "eval_runtime": 34.5543,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 10
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "grad_norm": 0.018981408327817917,
+      "learning_rate": 0.00017037037037037037,
+      "loss": 1.6072,
+      "step": 20
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "eval_loss": 1.5428930521011353,
+      "eval_runtime": 34.6485,
+      "eval_samples_per_second": 1.039,
+      "eval_steps_per_second": 0.144,
+      "step": 20
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "grad_norm": 0.023157037794589996,
+      "learning_rate": 0.00015555555555555556,
+      "loss": 1.4025,
+      "step": 30
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "eval_loss": 1.4176721572875977,
+      "eval_runtime": 34.5433,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 30
+    },
+    {
+      "epoch": 4.444444444444445,
+      "grad_norm": 0.021338749676942825,
+      "learning_rate": 0.00014074074074074076,
+      "loss": 1.285,
+      "step": 40
+    },
+    {
+      "epoch": 4.444444444444445,
+      "eval_loss": 1.3449772596359253,
+      "eval_runtime": 34.5594,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 40
+    },
+    {
+      "epoch": 5.555555555555555,
+      "grad_norm": 0.02489505708217621,
+      "learning_rate": 0.00012592592592592592,
+      "loss": 1.1687,
+      "step": 50
+    },
+    {
+      "epoch": 5.555555555555555,
+      "eval_loss": 1.2951068878173828,
+      "eval_runtime": 34.5896,
+      "eval_samples_per_second": 1.041,
+      "eval_steps_per_second": 0.145,
+      "step": 50
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 135,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 15,
+  "save_steps": 10,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 3,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.3359632879910912e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-50/training_args.bin b/checkpoint-50/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc
--- /dev/null
+++ b/checkpoint-50/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43
+size 5112
diff --git a/checkpoint-60/README.md b/checkpoint-60/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2d1596ffe16e4d5bdcdf0e1d4322e6667af95962
--- /dev/null
+++ b/checkpoint-60/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: TheBloke/Llama-2-7B-fp16
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.12.0
\ No newline at end of file
diff --git a/checkpoint-60/adapter_config.json b/checkpoint-60/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cbf93f2809e43fe18fd6ad23406293a68e7f5c98
--- /dev/null
+++ b/checkpoint-60/adapter_config.json
@@ -0,0 +1,31 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "TheBloke/Llama-2-7B-fp16",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "o_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-60/adapter_model.safetensors b/checkpoint-60/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e572cbaec375d03394cd3c280f91fac11a3fc2b0
--- /dev/null
+++ b/checkpoint-60/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db2917762f8b15e3bf37ffb24d36fadc8281664684350c104e7663e270ea20dc
+size 67143296
diff --git a/checkpoint-60/optimizer.pt b/checkpoint-60/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6056f4627bc13603632015486db18c9a4c53a353
--- /dev/null
+++ b/checkpoint-60/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6ee00713e8f75fec9ac92a487fe43f23b824b8125bd0f0413507f283ce38111
+size 134433530
diff --git a/checkpoint-60/rng_state.pth b/checkpoint-60/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..da55a8c8ec9c90fd906c57775dafb08d822354ee
--- /dev/null
+++ b/checkpoint-60/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25d70730d443178b00076ddaa68745a66875c06caff14848ea6edca1cd7e460c
+size 14244
diff --git a/checkpoint-60/scheduler.pt b/checkpoint-60/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..26adfd2b99d76fb3c7ae41002cae269cd882ee38
--- /dev/null
+++ b/checkpoint-60/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f2dae0587e75ddf657c6d2f1bfc77ac82e3e62449951f53cbb9a382f8e039b79
+size 1064
diff --git a/checkpoint-60/trainer_state.json b/checkpoint-60/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..7a12784894d4693dd4e1e7b8da2c088a7e613802
--- /dev/null
+++ b/checkpoint-60/trainer_state.json
@@ -0,0 +1,132 @@
+{
+  "best_metric": 1.2674343585968018,
+  "best_model_checkpoint": "/kaggle/working/checkpoint-60",
+  "epoch": 6.666666666666667,
+  "eval_steps": 10,
+  "global_step": 60,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.1111111111111112,
+      "grad_norm": 0.022282764315605164,
+      "learning_rate": 0.0001851851851851852,
+      "loss": 2.0424,
+      "step": 10
+    },
+    {
+      "epoch": 1.1111111111111112,
+      "eval_loss": 1.733155369758606,
+      "eval_runtime": 34.5543,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 10
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "grad_norm": 0.018981408327817917,
+      "learning_rate": 0.00017037037037037037,
+      "loss": 1.6072,
+      "step": 20
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "eval_loss": 1.5428930521011353,
+      "eval_runtime": 34.6485,
+      "eval_samples_per_second": 1.039,
+      "eval_steps_per_second": 0.144,
+      "step": 20
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "grad_norm": 0.023157037794589996,
+      "learning_rate": 0.00015555555555555556,
+      "loss": 1.4025,
+      "step": 30
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "eval_loss": 1.4176721572875977,
+      "eval_runtime": 34.5433,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 30
+    },
+    {
+      "epoch": 4.444444444444445,
+      "grad_norm": 0.021338749676942825,
+      "learning_rate": 0.00014074074074074076,
+      "loss": 1.285,
+      "step": 40
+    },
+    {
+      "epoch": 4.444444444444445,
+      "eval_loss": 1.3449772596359253,
+      "eval_runtime": 34.5594,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 40
+    },
+    {
+      "epoch": 5.555555555555555,
+      "grad_norm": 0.02489505708217621,
+      "learning_rate": 0.00012592592592592592,
+      "loss": 1.1687,
+      "step": 50
+    },
+    {
+      "epoch": 5.555555555555555,
+      "eval_loss": 1.2951068878173828,
+      "eval_runtime": 34.5896,
+      "eval_samples_per_second": 1.041,
+      "eval_steps_per_second": 0.145,
+      "step": 50
+    },
+    {
+      "epoch": 6.666666666666667,
+      "grad_norm": 0.028962766751646996,
+      "learning_rate": 0.00011111111111111112,
+      "loss": 1.0521,
+      "step": 60
+    },
+    {
+      "epoch": 6.666666666666667,
+      "eval_loss": 1.2674343585968018,
+      "eval_runtime": 34.5586,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 60
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 135,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 15,
+  "save_steps": 10,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 3,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.6036519597572096e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-60/training_args.bin b/checkpoint-60/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc
--- /dev/null
+++ b/checkpoint-60/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43
+size 5112
diff --git a/checkpoint-70/README.md b/checkpoint-70/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2d1596ffe16e4d5bdcdf0e1d4322e6667af95962
--- /dev/null
+++ b/checkpoint-70/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: TheBloke/Llama-2-7B-fp16
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.12.0
\ No newline at end of file
diff --git a/checkpoint-70/adapter_config.json b/checkpoint-70/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cbf93f2809e43fe18fd6ad23406293a68e7f5c98
--- /dev/null
+++ b/checkpoint-70/adapter_config.json
@@ -0,0 +1,31 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "TheBloke/Llama-2-7B-fp16",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "o_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-70/adapter_model.safetensors b/checkpoint-70/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7ed56b59d76b6a1da2bf2568dbfab45abde9d58f
--- /dev/null
+++ b/checkpoint-70/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf3c37297ed626b47ab9c41ebb2a26326a24e606918ef7c7fd629793854a6799
+size 67143296
diff --git a/checkpoint-70/optimizer.pt b/checkpoint-70/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..28208764b805c1768aa12d977465b294ef889535
--- /dev/null
+++ b/checkpoint-70/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4df5d5144e456b9ea6bf7bd481d07a757eea5bf300085855555e34b8b031648
+size 134433530
diff --git a/checkpoint-70/rng_state.pth b/checkpoint-70/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e586194fc1ef6abc2c5f56248ded9928618df599
--- /dev/null
+++ b/checkpoint-70/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:372756df51e53230a016e7ffa865a7bc1529480107042eb1599dcd5226d1997b
+size 14244
diff --git a/checkpoint-70/scheduler.pt b/checkpoint-70/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7d715f69ec0008c942a0c82d8c13ce801a3ab1c3
--- /dev/null
+++ b/checkpoint-70/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b4acdcca4bce1d839ad3b2f3830a589ce711414ff4548cbd4704149e66a014f
+size 1064
diff --git a/checkpoint-70/trainer_state.json b/checkpoint-70/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..3a48bb1be57349a537b049969fba97285f69e7b7
--- /dev/null
+++ b/checkpoint-70/trainer_state.json
@@ -0,0 +1,147 @@
+{
+  "best_metric": 1.2424466609954834,
+  "best_model_checkpoint": "/kaggle/working/checkpoint-70",
+  "epoch": 7.777777777777778,
+  "eval_steps": 10,
+  "global_step": 70,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.1111111111111112,
+      "grad_norm": 0.022282764315605164,
+      "learning_rate": 0.0001851851851851852,
+      "loss": 2.0424,
+      "step": 10
+    },
+    {
+      "epoch": 1.1111111111111112,
+      "eval_loss": 1.733155369758606,
+      "eval_runtime": 34.5543,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 10
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "grad_norm": 0.018981408327817917,
+      "learning_rate": 0.00017037037037037037,
+      "loss": 1.6072,
+      "step": 20
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "eval_loss": 1.5428930521011353,
+      "eval_runtime": 34.6485,
+      "eval_samples_per_second": 1.039,
+      "eval_steps_per_second": 0.144,
+      "step": 20
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "grad_norm": 0.023157037794589996,
+      "learning_rate": 0.00015555555555555556,
+      "loss": 1.4025,
+      "step": 30
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "eval_loss": 1.4176721572875977,
+      "eval_runtime": 34.5433,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 30
+    },
+    {
+      "epoch": 4.444444444444445,
+      "grad_norm": 0.021338749676942825,
+      "learning_rate": 0.00014074074074074076,
+      "loss": 1.285,
+      "step": 40
+    },
+    {
+      "epoch": 4.444444444444445,
+      "eval_loss": 1.3449772596359253,
+      "eval_runtime": 34.5594,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 40
+    },
+    {
+      "epoch": 5.555555555555555,
+      "grad_norm": 0.02489505708217621,
+      "learning_rate": 0.00012592592592592592,
+      "loss": 1.1687,
+      "step": 50
+    },
+    {
+      "epoch": 5.555555555555555,
+      "eval_loss": 1.2951068878173828,
+      "eval_runtime": 34.5896,
+      "eval_samples_per_second": 1.041,
+      "eval_steps_per_second": 0.145,
+      "step": 50
+    },
+    {
+      "epoch": 6.666666666666667,
+      "grad_norm": 0.028962766751646996,
+      "learning_rate": 0.00011111111111111112,
+      "loss": 1.0521,
+      "step": 60
+    },
+    {
+      "epoch": 6.666666666666667,
+      "eval_loss": 1.2674343585968018,
+      "eval_runtime": 34.5586,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 60
+    },
+    {
+      "epoch": 7.777777777777778,
+      "grad_norm": 0.033917125314474106,
+      "learning_rate": 9.62962962962963e-05,
+      "loss": 0.9885,
+      "step": 70
+    },
+    {
+      "epoch": 7.777777777777778,
+      "eval_loss": 1.2424466609954834,
+      "eval_runtime": 34.5412,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 70
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 135,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 15,
+  "save_steps": 10,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 3,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.8705775328034816e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-70/training_args.bin b/checkpoint-70/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc
--- /dev/null
+++ b/checkpoint-70/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43
+size 5112
diff --git a/checkpoint-80/README.md b/checkpoint-80/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2d1596ffe16e4d5bdcdf0e1d4322e6667af95962
--- /dev/null
+++ b/checkpoint-80/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: TheBloke/Llama-2-7B-fp16
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.12.0
\ No newline at end of file
diff --git a/checkpoint-80/adapter_config.json b/checkpoint-80/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cbf93f2809e43fe18fd6ad23406293a68e7f5c98
--- /dev/null
+++ b/checkpoint-80/adapter_config.json
@@ -0,0 +1,31 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "TheBloke/Llama-2-7B-fp16",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "o_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-80/adapter_model.safetensors b/checkpoint-80/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6186d5290dd28ae7e99b0fe968a081ab6d21d5b9
--- /dev/null
+++ b/checkpoint-80/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8fa6fb27f2616ec681d67ea4bdbe8a2579fb38adfa9fe8d06fc9c19524e1da7b
+size 67143296
diff --git a/checkpoint-80/optimizer.pt b/checkpoint-80/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6eb9649fce4390a4b3a402652bfce39cc3c173ea
--- /dev/null
+++ b/checkpoint-80/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ef989a76ea9a861bed593cbd65072f76fa28bbc0ec71984d26dd8c6e0775b98
+size 134433530
diff --git a/checkpoint-80/rng_state.pth b/checkpoint-80/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..bc3745cef029c90705ca84f2589bfb1f43715a7e
--- /dev/null
+++ b/checkpoint-80/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1145244a3fab5b36667f9c722b9e7a9ea285e3d07028c024cbf3acc4e83c92db
+size 14244
diff --git a/checkpoint-80/scheduler.pt b/checkpoint-80/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0440fcc5c6c0ee35644344e92c7f5db3e29f3de6
--- /dev/null
+++ b/checkpoint-80/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b82529da49a74f66d08569617b44d465bfa94ebb75128eba7c218e73a755220
+size 1064
diff --git a/checkpoint-80/trainer_state.json b/checkpoint-80/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..9f89f0b6522c39b15eff7ee5969d94a70d12d7ba
--- /dev/null
+++ b/checkpoint-80/trainer_state.json
@@ -0,0 +1,162 @@
+{
+  "best_metric": 1.2252851724624634,
+  "best_model_checkpoint": "/kaggle/working/checkpoint-80",
+  "epoch": 8.88888888888889,
+  "eval_steps": 10,
+  "global_step": 80,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.1111111111111112,
+      "grad_norm": 0.022282764315605164,
+      "learning_rate": 0.0001851851851851852,
+      "loss": 2.0424,
+      "step": 10
+    },
+    {
+      "epoch": 1.1111111111111112,
+      "eval_loss": 1.733155369758606,
+      "eval_runtime": 34.5543,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 10
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "grad_norm": 0.018981408327817917,
+      "learning_rate": 0.00017037037037037037,
+      "loss": 1.6072,
+      "step": 20
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "eval_loss": 1.5428930521011353,
+      "eval_runtime": 34.6485,
+      "eval_samples_per_second": 1.039,
+      "eval_steps_per_second": 0.144,
+      "step": 20
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "grad_norm": 0.023157037794589996,
+      "learning_rate": 0.00015555555555555556,
+      "loss": 1.4025,
+      "step": 30
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "eval_loss": 1.4176721572875977,
+      "eval_runtime": 34.5433,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 30
+    },
+    {
+      "epoch": 4.444444444444445,
+      "grad_norm": 0.021338749676942825,
+      "learning_rate": 0.00014074074074074076,
+      "loss": 1.285,
+      "step": 40
+    },
+    {
+      "epoch": 4.444444444444445,
+      "eval_loss": 1.3449772596359253,
+      "eval_runtime": 34.5594,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 40
+    },
+    {
+      "epoch": 5.555555555555555,
+      "grad_norm": 0.02489505708217621,
+      "learning_rate": 0.00012592592592592592,
+      "loss": 1.1687,
+      "step": 50
+    },
+    {
+      "epoch": 5.555555555555555,
+      "eval_loss": 1.2951068878173828,
+      "eval_runtime": 34.5896,
+      "eval_samples_per_second": 1.041,
+      "eval_steps_per_second": 0.145,
+      "step": 50
+    },
+    {
+      "epoch": 6.666666666666667,
+      "grad_norm": 0.028962766751646996,
+      "learning_rate": 0.00011111111111111112,
+      "loss": 1.0521,
+      "step": 60
+    },
+    {
+      "epoch": 6.666666666666667,
+      "eval_loss": 1.2674343585968018,
+      "eval_runtime": 34.5586,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 60
+    },
+    {
+      "epoch": 7.777777777777778,
+      "grad_norm": 0.033917125314474106,
+      "learning_rate": 9.62962962962963e-05,
+      "loss": 0.9885,
+      "step": 70
+    },
+    {
+      "epoch": 7.777777777777778,
+      "eval_loss": 1.2424466609954834,
+      "eval_runtime": 34.5412,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 70
+    },
+    {
+      "epoch": 8.88888888888889,
+      "grad_norm": 0.03393130004405975,
+      "learning_rate": 8.148148148148148e-05,
+      "loss": 0.8784,
+      "step": 80
+    },
+    {
+      "epoch": 8.88888888888889,
+      "eval_loss": 1.2252851724624634,
+      "eval_runtime": 34.58,
+      "eval_samples_per_second": 1.041,
+      "eval_steps_per_second": 0.145,
+      "step": 80
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 135,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 15,
+  "save_steps": 10,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 3,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.137153352269824e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-80/training_args.bin b/checkpoint-80/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc
--- /dev/null
+++ b/checkpoint-80/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43
+size 5112
diff --git a/checkpoint-90/README.md b/checkpoint-90/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2d1596ffe16e4d5bdcdf0e1d4322e6667af95962
--- /dev/null
+++ b/checkpoint-90/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: TheBloke/Llama-2-7B-fp16
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.12.0
\ No newline at end of file
diff --git a/checkpoint-90/adapter_config.json b/checkpoint-90/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cbf93f2809e43fe18fd6ad23406293a68e7f5c98
--- /dev/null
+++ b/checkpoint-90/adapter_config.json
@@ -0,0 +1,31 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "TheBloke/Llama-2-7B-fp16",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "o_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-90/adapter_model.safetensors b/checkpoint-90/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..464aecd7c193abd91aa5ac6a99ad4747551b506f
--- /dev/null
+++ b/checkpoint-90/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b9c5a0c100f8bc851b10aa9fa27f71bf6aa477d8c0d7b7747dc10050dbdbe8a9
+size 67143296
diff --git a/checkpoint-90/optimizer.pt b/checkpoint-90/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0d21653ed911ba9a59aa948e3d510b5ca26671a9
--- /dev/null
+++ b/checkpoint-90/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56a16c09a2c26f51affd932a4f61ddef77f90f2764342dfcc95095b133132790
+size 134433530
diff --git a/checkpoint-90/rng_state.pth b/checkpoint-90/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..554718b706d63a3f48155e8c454f03cbb05b6703
--- /dev/null
+++ b/checkpoint-90/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:646be11220bb4209bcc0cd870eda869caa0d4b0e01b9b5c90750d00e50c5a8bc
+size 14244
diff --git a/checkpoint-90/scheduler.pt b/checkpoint-90/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..96950aa94dbaaaa48fc31ffce5aa897ea65e909c
--- /dev/null
+++ b/checkpoint-90/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:beb0291b44310791547f87f6141cea4568622e9ffdad94f4d68fcee857c2fabe
+size 1064
diff --git a/checkpoint-90/trainer_state.json b/checkpoint-90/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..85a9b74cd54c9d40c81660a5a9479c8041b34cda
--- /dev/null
+++ b/checkpoint-90/trainer_state.json
@@ -0,0 +1,177 @@
+{
+  "best_metric": 1.2115424871444702,
+  "best_model_checkpoint": "/kaggle/working/checkpoint-90",
+  "epoch": 10.0,
+  "eval_steps": 10,
+  "global_step": 90,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.1111111111111112,
+      "grad_norm": 0.022282764315605164,
+      "learning_rate": 0.0001851851851851852,
+      "loss": 2.0424,
+      "step": 10
+    },
+    {
+      "epoch": 1.1111111111111112,
+      "eval_loss": 1.733155369758606,
+      "eval_runtime": 34.5543,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 10
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "grad_norm": 0.018981408327817917,
+      "learning_rate": 0.00017037037037037037,
+      "loss": 1.6072,
+      "step": 20
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "eval_loss": 1.5428930521011353,
+      "eval_runtime": 34.6485,
+      "eval_samples_per_second": 1.039,
+      "eval_steps_per_second": 0.144,
+      "step": 20
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "grad_norm": 0.023157037794589996,
+      "learning_rate": 0.00015555555555555556,
+      "loss": 1.4025,
+      "step": 30
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "eval_loss": 1.4176721572875977,
+      "eval_runtime": 34.5433,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 30
+    },
+    {
+      "epoch": 4.444444444444445,
+      "grad_norm": 0.021338749676942825,
+      "learning_rate": 0.00014074074074074076,
+      "loss": 1.285,
+      "step": 40
+    },
+    {
+      "epoch": 4.444444444444445,
+      "eval_loss": 1.3449772596359253,
+      "eval_runtime": 34.5594,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 40
+    },
+    {
+      "epoch": 5.555555555555555,
+      "grad_norm": 0.02489505708217621,
+      "learning_rate": 0.00012592592592592592,
+      "loss": 1.1687,
+      "step": 50
+    },
+    {
+      "epoch": 5.555555555555555,
+      "eval_loss": 1.2951068878173828,
+      "eval_runtime": 34.5896,
+      "eval_samples_per_second": 1.041,
+      "eval_steps_per_second": 0.145,
+      "step": 50
+    },
+    {
+      "epoch": 6.666666666666667,
+      "grad_norm": 0.028962766751646996,
+      "learning_rate": 0.00011111111111111112,
+      "loss": 1.0521,
+      "step": 60
+    },
+    {
+      "epoch": 6.666666666666667,
+      "eval_loss": 1.2674343585968018,
+      "eval_runtime": 34.5586,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 60
+    },
+    {
+      "epoch": 7.777777777777778,
+      "grad_norm": 0.033917125314474106,
+      "learning_rate": 9.62962962962963e-05,
+      "loss": 0.9885,
+      "step": 70
+    },
+    {
+      "epoch": 7.777777777777778,
+      "eval_loss": 1.2424466609954834,
+      "eval_runtime": 34.5412,
+      "eval_samples_per_second": 1.042,
+      "eval_steps_per_second": 0.145,
+      "step": 70
+    },
+    {
+      "epoch": 8.88888888888889,
+      "grad_norm": 0.03393130004405975,
+      "learning_rate": 8.148148148148148e-05,
+      "loss": 0.8784,
+      "step": 80
+    },
+    {
+      "epoch": 8.88888888888889,
+      "eval_loss": 1.2252851724624634,
+      "eval_runtime": 34.58,
+      "eval_samples_per_second": 1.041,
+      "eval_steps_per_second": 0.145,
+      "step": 80
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 0.04081139340996742,
+      "learning_rate": 6.666666666666667e-05,
+      "loss": 0.8154,
+      "step": 90
+    },
+    {
+      "epoch": 10.0,
+      "eval_loss": 1.2115424871444702,
+      "eval_runtime": 34.5784,
+      "eval_samples_per_second": 1.041,
+      "eval_steps_per_second": 0.145,
+      "step": 90
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 135,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 15,
+  "save_steps": 10,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 3,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.404714840915968e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-90/training_args.bin b/checkpoint-90/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc
--- /dev/null
+++ b/checkpoint-90/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43
+size 5112
diff --git a/config.json b/config.json
index 258a5dd33a9cd50769867ae18e3f1c8164c04074..80afc097aeac0205c9feeb402b87040f5d89be74 100644
--- a/config.json
+++ b/config.json
@@ -39,7 +39,7 @@
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
-  "transformers_version": "4.42.3",
+  "transformers_version": "4.41.2",
   "use_cache": true,
   "vocab_size": 32000
 }
diff --git a/runs/Jul31_19-17-11_5ac8ef822596/events.out.tfevents.1722453432.5ac8ef822596.34.0 b/runs/Jul31_19-17-11_5ac8ef822596/events.out.tfevents.1722453432.5ac8ef822596.34.0
new file mode 100644
index 0000000000000000000000000000000000000000..c54b0cd9d64ed9be791c6e49c9ffdba2c04b17f8
--- /dev/null
+++ b/runs/Jul31_19-17-11_5ac8ef822596/events.out.tfevents.1722453432.5ac8ef822596.34.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5f7bb873641ef66789d00130eb603f6040b919bc37acfddcae0830bb8a51487
+size 11877