diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4b6f787248182d62a16f6423f948c336352c3674
--- /dev/null
+++ b/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: bigcode/starcoder
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/adapter_config.json b/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ab6c617ff1322585052700834abbc593bae7c619
--- /dev/null
+++ b/adapter_config.json
@@ -0,0 +1,28 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bigcode/starcoder",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_proj",
+    "c_attn",
+    "q_attn"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/adapter_model.safetensors b/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-100/README.md b/checkpoint-100/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4b6f787248182d62a16f6423f948c336352c3674
--- /dev/null
+++ b/checkpoint-100/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: bigcode/starcoder
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-100/adapter_config.json b/checkpoint-100/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ab6c617ff1322585052700834abbc593bae7c619
--- /dev/null
+++ b/checkpoint-100/adapter_config.json
@@ -0,0 +1,28 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bigcode/starcoder",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_proj",
+    "c_attn",
+    "q_attn"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-100/adapter_model.safetensors b/checkpoint-100/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-100/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dc03c082514a5c09cbe5b0e5ca3b3c4f2ea4fa90
--- /dev/null
+++ b/checkpoint-100/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1feaae461eeadb5beb9c4062de583386b3387578fda1f3fd0bf0a01207bb3a6d
+size 284628602
diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d602a0b78d132dcd53f81a72e3899303ef72317c
--- /dev/null
+++ b/checkpoint-100/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d90fab8063bac27bbc03ad7b8e96092834ee7799bc4eaae9ec98aa646a6358f6
+size 14244
diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..318a48d667b3ecd4666dc692587f103642e3669d
--- /dev/null
+++ b/checkpoint-100/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a1553fa3c9fa4158c1d95c29f303ea1c82da9959617b77d6296624e38c6ca27
+size 1064
diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c0b76eb461f33b7e474bb6aedee5916110a95b0
--- /dev/null
+++ b/checkpoint-100/trainer_state.json
@@ -0,0 +1,131 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 8.16326530612245,
+  "eval_steps": 20,
+  "global_step": 100,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.18377472460269928,
+      "learning_rate": 2.9999999999999997e-05,
+      "loss": 1.861,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.35202744603157043,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 1.7263,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": 1.4457755088806152,
+      "eval_runtime": 89.8938,
+      "eval_samples_per_second": 4.305,
+      "eval_steps_per_second": 0.545,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": 0.928983747959137,
+      "learning_rate": 8.999999999999999e-05,
+      "loss": 1.1718,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": 0.253262996673584,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.4789,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": 0.3332095146179199,
+      "eval_runtime": 89.9804,
+      "eval_samples_per_second": 4.301,
+      "eval_steps_per_second": 0.545,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.12236642092466354,
+      "learning_rate": 0.00015,
+      "loss": 0.3568,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.09160923212766647,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.3256,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": 0.2753114104270935,
+      "eval_runtime": 90.3206,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": 0.10242326557636261,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.2841,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": 0.1305350810289383,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.2615,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": 0.2476309835910797,
+      "eval_runtime": 90.525,
+      "eval_samples_per_second": 4.275,
+      "eval_steps_per_second": 0.541,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.17941106855869293,
+      "learning_rate": 0.00027,
+      "loss": 0.2216,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.20095375180244446,
+      "learning_rate": 0.0003,
+      "loss": 0.1832,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": 0.2350914180278778,
+      "eval_runtime": 90.3919,
+      "eval_samples_per_second": 4.281,
+      "eval_steps_per_second": 0.542,
+      "step": 100
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 5.738534596761354e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3f0d01d8ba12c6a725736cdf727e72ec03ea9a4f
--- /dev/null
+++ b/checkpoint-100/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01be3c1366faeea704d7b18d02c117abdc170d0c96565a08a0f3ad9c5e7a123a
+size 4856
diff --git a/checkpoint-120/README.md b/checkpoint-120/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4b6f787248182d62a16f6423f948c336352c3674
--- /dev/null
+++ b/checkpoint-120/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: bigcode/starcoder
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-120/adapter_config.json b/checkpoint-120/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ab6c617ff1322585052700834abbc593bae7c619
--- /dev/null
+++ b/checkpoint-120/adapter_config.json
@@ -0,0 +1,28 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bigcode/starcoder",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_proj",
+    "c_attn",
+    "q_attn"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-120/adapter_model.safetensors b/checkpoint-120/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-120/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-120/optimizer.pt b/checkpoint-120/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..58012379df7fb07e00e6800310b7a0136aab36e8
--- /dev/null
+++ b/checkpoint-120/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:114dfd322ab013f62616a9a1cfb3c4b02ccf97f698e28bfa2a74a9f40856e99a
+size 284628602
diff --git a/checkpoint-120/rng_state.pth b/checkpoint-120/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..332d878bb40dd469698f0ce249139b035b5a04d9
--- /dev/null
+++ b/checkpoint-120/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5618ca5f563e0e88f407ba712395955e41529e4d54f3b9fc10e2af22327ec8cc
+size 14244
diff --git a/checkpoint-120/scheduler.pt b/checkpoint-120/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8fa93f4b997f90de52bbb57f00f5b82cee75381a
--- /dev/null
+++ b/checkpoint-120/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:915b81b2641ff3c1c29a91260eaebd9d13138742f1687cfcdc8c68bcb9ea698e
+size 1064
diff --git a/checkpoint-120/trainer_state.json b/checkpoint-120/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..164381e495b8d38526141b93902ad6c7fa1962d8
--- /dev/null
+++ b/checkpoint-120/trainer_state.json
@@ -0,0 +1,153 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 9.795918367346939,
+  "eval_steps": 20,
+  "global_step": 120,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.18377472460269928,
+      "learning_rate": 2.9999999999999997e-05,
+      "loss": 1.861,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.35202744603157043,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 1.7263,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": 1.4457755088806152,
+      "eval_runtime": 89.8938,
+      "eval_samples_per_second": 4.305,
+      "eval_steps_per_second": 0.545,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": 0.928983747959137,
+      "learning_rate": 8.999999999999999e-05,
+      "loss": 1.1718,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": 0.253262996673584,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.4789,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": 0.3332095146179199,
+      "eval_runtime": 89.9804,
+      "eval_samples_per_second": 4.301,
+      "eval_steps_per_second": 0.545,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.12236642092466354,
+      "learning_rate": 0.00015,
+      "loss": 0.3568,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.09160923212766647,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.3256,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": 0.2753114104270935,
+      "eval_runtime": 90.3206,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": 0.10242326557636261,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.2841,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": 0.1305350810289383,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.2615,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": 0.2476309835910797,
+      "eval_runtime": 90.525,
+      "eval_samples_per_second": 4.275,
+      "eval_steps_per_second": 0.541,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.17941106855869293,
+      "learning_rate": 0.00027,
+      "loss": 0.2216,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.20095375180244446,
+      "learning_rate": 0.0003,
+      "loss": 0.1832,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": 0.2350914180278778,
+      "eval_runtime": 90.3919,
+      "eval_samples_per_second": 4.281,
+      "eval_steps_per_second": 0.542,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": 0.2600422501564026,
+      "learning_rate": 0.00029,
+      "loss": 0.1441,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": 0.20544037222862244,
+      "learning_rate": 0.00028,
+      "loss": 0.1186,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": 0.23090216517448425,
+      "eval_runtime": 90.3144,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 120
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 6.887969287129006e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-120/training_args.bin b/checkpoint-120/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3f0d01d8ba12c6a725736cdf727e72ec03ea9a4f
--- /dev/null
+++ b/checkpoint-120/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01be3c1366faeea704d7b18d02c117abdc170d0c96565a08a0f3ad9c5e7a123a
+size 4856
diff --git a/checkpoint-140/README.md b/checkpoint-140/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4b6f787248182d62a16f6423f948c336352c3674
--- /dev/null
+++ b/checkpoint-140/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: bigcode/starcoder
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-140/adapter_config.json b/checkpoint-140/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ab6c617ff1322585052700834abbc593bae7c619
--- /dev/null
+++ b/checkpoint-140/adapter_config.json
@@ -0,0 +1,28 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bigcode/starcoder",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_proj",
+    "c_attn",
+    "q_attn"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-140/adapter_model.safetensors b/checkpoint-140/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-140/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-140/optimizer.pt b/checkpoint-140/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4c4ed057d308c0ef2c245ae7b383aa03b39eacd5
--- /dev/null
+++ b/checkpoint-140/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:222684730d062ab6a2f08b62218a814e0c9939380313556b54cf827c12929f8b
+size 284628602
diff --git a/checkpoint-140/rng_state.pth b/checkpoint-140/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a866188c4475412d39e020396dba53e10e910537
--- /dev/null
+++ b/checkpoint-140/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e2ac710919be23109fc86c5fff99822b2cb7d7cfb09bc3a5a36f390af49cb96a
+size 14244
diff --git a/checkpoint-140/scheduler.pt b/checkpoint-140/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..608ef2d9b88b439e133656d8fcce0b3141115e28
--- /dev/null
+++ b/checkpoint-140/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:86d5f7e9b17251247c39fdd59964f3693734ddb120bd20dcb453e9efae1445bd
+size 1064
diff --git a/checkpoint-140/trainer_state.json b/checkpoint-140/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..51749bc1f593c4aa4482eaf2249e70d63b248719
--- /dev/null
+++ b/checkpoint-140/trainer_state.json
@@ -0,0 +1,175 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 11.428571428571429,
+  "eval_steps": 20,
+  "global_step": 140,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.18377472460269928,
+      "learning_rate": 2.9999999999999997e-05,
+      "loss": 1.861,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.35202744603157043,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 1.7263,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": 1.4457755088806152,
+      "eval_runtime": 89.8938,
+      "eval_samples_per_second": 4.305,
+      "eval_steps_per_second": 0.545,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": 0.928983747959137,
+      "learning_rate": 8.999999999999999e-05,
+      "loss": 1.1718,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": 0.253262996673584,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.4789,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": 0.3332095146179199,
+      "eval_runtime": 89.9804,
+      "eval_samples_per_second": 4.301,
+      "eval_steps_per_second": 0.545,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.12236642092466354,
+      "learning_rate": 0.00015,
+      "loss": 0.3568,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.09160923212766647,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.3256,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": 0.2753114104270935,
+      "eval_runtime": 90.3206,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": 0.10242326557636261,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.2841,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": 0.1305350810289383,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.2615,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": 0.2476309835910797,
+      "eval_runtime": 90.525,
+      "eval_samples_per_second": 4.275,
+      "eval_steps_per_second": 0.541,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.17941106855869293,
+      "learning_rate": 0.00027,
+      "loss": 0.2216,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.20095375180244446,
+      "learning_rate": 0.0003,
+      "loss": 0.1832,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": 0.2350914180278778,
+      "eval_runtime": 90.3919,
+      "eval_samples_per_second": 4.281,
+      "eval_steps_per_second": 0.542,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": 0.2600422501564026,
+      "learning_rate": 0.00029,
+      "loss": 0.1441,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": 0.20544037222862244,
+      "learning_rate": 0.00028,
+      "loss": 0.1186,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": 0.23090216517448425,
+      "eval_runtime": 90.3144,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 120
+    },
+    {
+      "epoch": 10.61,
+      "grad_norm": 0.2158157229423523,
+      "learning_rate": 0.00027,
+      "loss": 0.0947,
+      "step": 130
+    },
+    {
+      "epoch": 11.43,
+      "grad_norm": 0.18916285037994385,
+      "learning_rate": 0.00026,
+      "loss": 0.0768,
+      "step": 140
+    },
+    {
+      "epoch": 11.43,
+      "eval_loss": 0.24214179813861847,
+      "eval_runtime": 90.2597,
+      "eval_samples_per_second": 4.288,
+      "eval_steps_per_second": 0.543,
+      "step": 140
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 8.03366825638232e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-140/training_args.bin b/checkpoint-140/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3f0d01d8ba12c6a725736cdf727e72ec03ea9a4f
--- /dev/null
+++ b/checkpoint-140/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01be3c1366faeea704d7b18d02c117abdc170d0c96565a08a0f3ad9c5e7a123a
+size 4856
diff --git a/checkpoint-160/README.md b/checkpoint-160/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4b6f787248182d62a16f6423f948c336352c3674
--- /dev/null
+++ b/checkpoint-160/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: bigcode/starcoder
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-160/adapter_config.json b/checkpoint-160/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ab6c617ff1322585052700834abbc593bae7c619
--- /dev/null
+++ b/checkpoint-160/adapter_config.json
@@ -0,0 +1,28 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bigcode/starcoder",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_proj",
+    "c_attn",
+    "q_attn"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-160/adapter_model.safetensors b/checkpoint-160/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-160/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-160/optimizer.pt b/checkpoint-160/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4f993ea49ff0af264820baba834c98ac39b08e71
--- /dev/null
+++ b/checkpoint-160/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2747c6579221cdf5bb28a934c1eb1fb4aff8ef6d09a8b0bd96f626ed051b467c
+size 284628602
diff --git a/checkpoint-160/rng_state.pth b/checkpoint-160/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..891e7f7ec2620dd626a23b78a6aeea99a5fd180e
--- /dev/null
+++ b/checkpoint-160/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b2db044807e15fad135cbedea4b155f149313f24f1ea48d56839a21289a56f10
+size 14244
diff --git a/checkpoint-160/scheduler.pt b/checkpoint-160/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..384d44e5e6558d886996d98c0679b2de9b934a4c
--- /dev/null
+++ b/checkpoint-160/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6bc91ca4e04e59b6aa0def614e5d23daa02dfd514357b40e48edfe0bf128e03
+size 1064
diff --git a/checkpoint-160/trainer_state.json b/checkpoint-160/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..16357ab526feb5f2e1c6f9f7e20c8afbfb20c1d2
--- /dev/null
+++ b/checkpoint-160/trainer_state.json
@@ -0,0 +1,197 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 13.061224489795919,
+  "eval_steps": 20,
+  "global_step": 160,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.18377472460269928,
+      "learning_rate": 2.9999999999999997e-05,
+      "loss": 1.861,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.35202744603157043,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 1.7263,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": 1.4457755088806152,
+      "eval_runtime": 89.8938,
+      "eval_samples_per_second": 4.305,
+      "eval_steps_per_second": 0.545,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": 0.928983747959137,
+      "learning_rate": 8.999999999999999e-05,
+      "loss": 1.1718,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": 0.253262996673584,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.4789,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": 0.3332095146179199,
+      "eval_runtime": 89.9804,
+      "eval_samples_per_second": 4.301,
+      "eval_steps_per_second": 0.545,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.12236642092466354,
+      "learning_rate": 0.00015,
+      "loss": 0.3568,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.09160923212766647,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.3256,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": 0.2753114104270935,
+      "eval_runtime": 90.3206,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": 0.10242326557636261,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.2841,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": 0.1305350810289383,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.2615,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": 0.2476309835910797,
+      "eval_runtime": 90.525,
+      "eval_samples_per_second": 4.275,
+      "eval_steps_per_second": 0.541,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.17941106855869293,
+      "learning_rate": 0.00027,
+      "loss": 0.2216,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.20095375180244446,
+      "learning_rate": 0.0003,
+      "loss": 0.1832,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": 0.2350914180278778,
+      "eval_runtime": 90.3919,
+      "eval_samples_per_second": 4.281,
+      "eval_steps_per_second": 0.542,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": 0.2600422501564026,
+      "learning_rate": 0.00029,
+      "loss": 0.1441,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": 0.20544037222862244,
+      "learning_rate": 0.00028,
+      "loss": 0.1186,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": 0.23090216517448425,
+      "eval_runtime": 90.3144,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 120
+    },
+    {
+      "epoch": 10.61,
+      "grad_norm": 0.2158157229423523,
+      "learning_rate": 0.00027,
+      "loss": 0.0947,
+      "step": 130
+    },
+    {
+      "epoch": 11.43,
+      "grad_norm": 0.18916285037994385,
+      "learning_rate": 0.00026,
+      "loss": 0.0768,
+      "step": 140
+    },
+    {
+      "epoch": 11.43,
+      "eval_loss": 0.24214179813861847,
+      "eval_runtime": 90.2597,
+      "eval_samples_per_second": 4.288,
+      "eval_steps_per_second": 0.543,
+      "step": 140
+    },
+    {
+      "epoch": 12.24,
+      "grad_norm": 0.22263498604297638,
+      "learning_rate": 0.00025,
+      "loss": 0.0615,
+      "step": 150
+    },
+    {
+      "epoch": 13.06,
+      "grad_norm": 0.21315976977348328,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.054,
+      "step": 160
+    },
+    {
+      "epoch": 13.06,
+      "eval_loss": 0.25932466983795166,
+      "eval_runtime": 89.8439,
+      "eval_samples_per_second": 4.307,
+      "eval_steps_per_second": 0.545,
+      "step": 160
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 9.17609846966059e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-160/training_args.bin b/checkpoint-160/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3f0d01d8ba12c6a725736cdf727e72ec03ea9a4f
--- /dev/null
+++ b/checkpoint-160/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01be3c1366faeea704d7b18d02c117abdc170d0c96565a08a0f3ad9c5e7a123a
+size 4856
diff --git a/checkpoint-180/README.md b/checkpoint-180/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4b6f787248182d62a16f6423f948c336352c3674
--- /dev/null
+++ b/checkpoint-180/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: bigcode/starcoder
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-180/adapter_config.json b/checkpoint-180/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ab6c617ff1322585052700834abbc593bae7c619
--- /dev/null
+++ b/checkpoint-180/adapter_config.json
@@ -0,0 +1,28 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bigcode/starcoder",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_proj",
+    "c_attn",
+    "q_attn"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-180/adapter_model.safetensors b/checkpoint-180/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-180/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-180/optimizer.pt b/checkpoint-180/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a0025ee9d809320e2c3a42ac3664fb0f4392b2c8
--- /dev/null
+++ b/checkpoint-180/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac59b4a8215def070849b25780be46446aedd048d71d1cd8965d8b3e3c958f0a
+size 284628602
diff --git a/checkpoint-180/rng_state.pth b/checkpoint-180/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6c90592f6140364b1eaa2a325221d598d7d9f7ed
--- /dev/null
+++ b/checkpoint-180/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c643cd616fa0e3b8fc7b1ca162a78b68d156fe98612a09f80241bf8fd9147e5d
+size 14244
diff --git a/checkpoint-180/scheduler.pt b/checkpoint-180/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..900ec9e5bde9a5e4f8680c6956fe26c89923d01b
--- /dev/null
+++ b/checkpoint-180/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5dfc3bc75009efe48747cf9948a244ecefe4e6b758242f83bed075794e9a377
+size 1064
diff --git a/checkpoint-180/trainer_state.json b/checkpoint-180/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..1f6eefe61a895b97393132fa7d0ef16a8fb5346d
--- /dev/null
+++ b/checkpoint-180/trainer_state.json
@@ -0,0 +1,219 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 14.693877551020408,
+  "eval_steps": 20,
+  "global_step": 180,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.18377472460269928,
+      "learning_rate": 2.9999999999999997e-05,
+      "loss": 1.861,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.35202744603157043,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 1.7263,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": 1.4457755088806152,
+      "eval_runtime": 89.8938,
+      "eval_samples_per_second": 4.305,
+      "eval_steps_per_second": 0.545,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": 0.928983747959137,
+      "learning_rate": 8.999999999999999e-05,
+      "loss": 1.1718,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": 0.253262996673584,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.4789,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": 0.3332095146179199,
+      "eval_runtime": 89.9804,
+      "eval_samples_per_second": 4.301,
+      "eval_steps_per_second": 0.545,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.12236642092466354,
+      "learning_rate": 0.00015,
+      "loss": 0.3568,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.09160923212766647,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.3256,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": 0.2753114104270935,
+      "eval_runtime": 90.3206,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": 0.10242326557636261,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.2841,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": 0.1305350810289383,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.2615,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": 0.2476309835910797,
+      "eval_runtime": 90.525,
+      "eval_samples_per_second": 4.275,
+      "eval_steps_per_second": 0.541,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.17941106855869293,
+      "learning_rate": 0.00027,
+      "loss": 0.2216,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.20095375180244446,
+      "learning_rate": 0.0003,
+      "loss": 0.1832,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": 0.2350914180278778,
+      "eval_runtime": 90.3919,
+      "eval_samples_per_second": 4.281,
+      "eval_steps_per_second": 0.542,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": 0.2600422501564026,
+      "learning_rate": 0.00029,
+      "loss": 0.1441,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": 0.20544037222862244,
+      "learning_rate": 0.00028,
+      "loss": 0.1186,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": 0.23090216517448425,
+      "eval_runtime": 90.3144,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 120
+    },
+    {
+      "epoch": 10.61,
+      "grad_norm": 0.2158157229423523,
+      "learning_rate": 0.00027,
+      "loss": 0.0947,
+      "step": 130
+    },
+    {
+      "epoch": 11.43,
+      "grad_norm": 0.18916285037994385,
+      "learning_rate": 0.00026,
+      "loss": 0.0768,
+      "step": 140
+    },
+    {
+      "epoch": 11.43,
+      "eval_loss": 0.24214179813861847,
+      "eval_runtime": 90.2597,
+      "eval_samples_per_second": 4.288,
+      "eval_steps_per_second": 0.543,
+      "step": 140
+    },
+    {
+      "epoch": 12.24,
+      "grad_norm": 0.22263498604297638,
+      "learning_rate": 0.00025,
+      "loss": 0.0615,
+      "step": 150
+    },
+    {
+      "epoch": 13.06,
+      "grad_norm": 0.21315976977348328,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.054,
+      "step": 160
+    },
+    {
+      "epoch": 13.06,
+      "eval_loss": 0.25932466983795166,
+      "eval_runtime": 89.8439,
+      "eval_samples_per_second": 4.307,
+      "eval_steps_per_second": 0.545,
+      "step": 160
+    },
+    {
+      "epoch": 13.88,
+      "grad_norm": 0.18338361382484436,
+      "learning_rate": 0.00023,
+      "loss": 0.0455,
+      "step": 170
+    },
+    {
+      "epoch": 14.69,
+      "grad_norm": 0.17157459259033203,
+      "learning_rate": 0.00021999999999999995,
+      "loss": 0.0393,
+      "step": 180
+    },
+    {
+      "epoch": 14.69,
+      "eval_loss": 0.27233538031578064,
+      "eval_runtime": 90.1364,
+      "eval_samples_per_second": 4.293,
+      "eval_steps_per_second": 0.544,
+      "step": 180
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 1.0330202811421164e+18,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-180/training_args.bin b/checkpoint-180/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3f0d01d8ba12c6a725736cdf727e72ec03ea9a4f
--- /dev/null
+++ b/checkpoint-180/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01be3c1366faeea704d7b18d02c117abdc170d0c96565a08a0f3ad9c5e7a123a
+size 4856
diff --git a/checkpoint-20/README.md b/checkpoint-20/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4b6f787248182d62a16f6423f948c336352c3674
--- /dev/null
+++ b/checkpoint-20/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: bigcode/starcoder
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-20/adapter_config.json b/checkpoint-20/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ab6c617ff1322585052700834abbc593bae7c619
--- /dev/null
+++ b/checkpoint-20/adapter_config.json
@@ -0,0 +1,28 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bigcode/starcoder",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_proj",
+    "c_attn",
+    "q_attn"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-20/adapter_model.safetensors b/checkpoint-20/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-20/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-20/optimizer.pt b/checkpoint-20/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b8dd68ca6f752d2a16b4ec8e9d1cff56f20d1159
--- /dev/null
+++ b/checkpoint-20/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df048142c25b752046df970d6dcc0693d5d7f4c4193730d6dbdeabe3af9861ad
+size 284628602
diff --git a/checkpoint-20/rng_state.pth b/checkpoint-20/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..dae5ec1cccd3f30a9db135f547386ac77e0daf33
--- /dev/null
+++ b/checkpoint-20/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2dbe6d770d526425c4ebf1c166cc93b0c0ddb2b941fd4176071da7fada6d3fe5
+size 14244
diff --git a/checkpoint-20/scheduler.pt b/checkpoint-20/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e0b30d480a76e7df4b910aa4733f5d1c8ecf4338
--- /dev/null
+++ b/checkpoint-20/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2bc30782c80f39b95ece53e16ed533d0eb1d775796dc4d2c39691e355b142ab4
+size 1064
diff --git a/checkpoint-20/trainer_state.json b/checkpoint-20/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..26f8e0734d25a2efd55dd2718e7a719099e4ef2e
--- /dev/null
+++ b/checkpoint-20/trainer_state.json
@@ -0,0 +1,43 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.6326530612244898,
+  "eval_steps": 20,
+  "global_step": 20,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.18377472460269928,
+      "learning_rate": 2.9999999999999997e-05,
+      "loss": 1.861,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.35202744603157043,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 1.7263,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": 1.4457755088806152,
+      "eval_runtime": 89.8938,
+      "eval_samples_per_second": 4.305,
+      "eval_steps_per_second": 0.545,
+      "step": 20
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 1.1541043417605734e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-20/training_args.bin b/checkpoint-20/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3f0d01d8ba12c6a725736cdf727e72ec03ea9a4f
--- /dev/null
+++ b/checkpoint-20/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01be3c1366faeea704d7b18d02c117abdc170d0c96565a08a0f3ad9c5e7a123a
+size 4856
diff --git a/checkpoint-200/README.md b/checkpoint-200/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4b6f787248182d62a16f6423f948c336352c3674
--- /dev/null
+++ b/checkpoint-200/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: bigcode/starcoder
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-200/adapter_config.json b/checkpoint-200/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ab6c617ff1322585052700834abbc593bae7c619
--- /dev/null
+++ b/checkpoint-200/adapter_config.json
@@ -0,0 +1,28 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bigcode/starcoder",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_proj",
+    "c_attn",
+    "q_attn"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-200/adapter_model.safetensors b/checkpoint-200/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-200/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-200/optimizer.pt b/checkpoint-200/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6c02a283116b03d2b2404681ace2c59935d5018b
--- /dev/null
+++ b/checkpoint-200/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26cb22ac8210f02dc52c28309a4874485ef4e82e679847351f7bd4ca454bbe32
+size 284628602
diff --git a/checkpoint-200/rng_state.pth b/checkpoint-200/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b7b964654466296f8f2f080bc3f86ca422e0afd5
--- /dev/null
+++ b/checkpoint-200/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:637237de1d6cfcc0cc43e19a5f2a0f933019b992a579e0440bc25f8954866eab
+size 14244
diff --git a/checkpoint-200/scheduler.pt b/checkpoint-200/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f613c3ff4de97ff4d7e9aa2a9f8ee26805165829
--- /dev/null
+++ b/checkpoint-200/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6795bda54303bd8bc8385fc9d0f4a2d7f6d29bd4f0ce908d2fb2991845ecc313
+size 1064
diff --git a/checkpoint-200/trainer_state.json b/checkpoint-200/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..1ba3af8fb6cd19e743e9c5597155c544c347daef
--- /dev/null
+++ b/checkpoint-200/trainer_state.json
@@ -0,0 +1,241 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 16.3265306122449,
+  "eval_steps": 20,
+  "global_step": 200,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.18377472460269928,
+      "learning_rate": 2.9999999999999997e-05,
+      "loss": 1.861,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.35202744603157043,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 1.7263,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": 1.4457755088806152,
+      "eval_runtime": 89.8938,
+      "eval_samples_per_second": 4.305,
+      "eval_steps_per_second": 0.545,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": 0.928983747959137,
+      "learning_rate": 8.999999999999999e-05,
+      "loss": 1.1718,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": 0.253262996673584,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.4789,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": 0.3332095146179199,
+      "eval_runtime": 89.9804,
+      "eval_samples_per_second": 4.301,
+      "eval_steps_per_second": 0.545,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.12236642092466354,
+      "learning_rate": 0.00015,
+      "loss": 0.3568,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.09160923212766647,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.3256,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": 0.2753114104270935,
+      "eval_runtime": 90.3206,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": 0.10242326557636261,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.2841,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": 0.1305350810289383,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.2615,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": 0.2476309835910797,
+      "eval_runtime": 90.525,
+      "eval_samples_per_second": 4.275,
+      "eval_steps_per_second": 0.541,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.17941106855869293,
+      "learning_rate": 0.00027,
+      "loss": 0.2216,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.20095375180244446,
+      "learning_rate": 0.0003,
+      "loss": 0.1832,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": 0.2350914180278778,
+      "eval_runtime": 90.3919,
+      "eval_samples_per_second": 4.281,
+      "eval_steps_per_second": 0.542,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": 0.2600422501564026,
+      "learning_rate": 0.00029,
+      "loss": 0.1441,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": 0.20544037222862244,
+      "learning_rate": 0.00028,
+      "loss": 0.1186,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": 0.23090216517448425,
+      "eval_runtime": 90.3144,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 120
+    },
+    {
+      "epoch": 10.61,
+      "grad_norm": 0.2158157229423523,
+      "learning_rate": 0.00027,
+      "loss": 0.0947,
+      "step": 130
+    },
+    {
+      "epoch": 11.43,
+      "grad_norm": 0.18916285037994385,
+      "learning_rate": 0.00026,
+      "loss": 0.0768,
+      "step": 140
+    },
+    {
+      "epoch": 11.43,
+      "eval_loss": 0.24214179813861847,
+      "eval_runtime": 90.2597,
+      "eval_samples_per_second": 4.288,
+      "eval_steps_per_second": 0.543,
+      "step": 140
+    },
+    {
+      "epoch": 12.24,
+      "grad_norm": 0.22263498604297638,
+      "learning_rate": 0.00025,
+      "loss": 0.0615,
+      "step": 150
+    },
+    {
+      "epoch": 13.06,
+      "grad_norm": 0.21315976977348328,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.054,
+      "step": 160
+    },
+    {
+      "epoch": 13.06,
+      "eval_loss": 0.25932466983795166,
+      "eval_runtime": 89.8439,
+      "eval_samples_per_second": 4.307,
+      "eval_steps_per_second": 0.545,
+      "step": 160
+    },
+    {
+      "epoch": 13.88,
+      "grad_norm": 0.18338361382484436,
+      "learning_rate": 0.00023,
+      "loss": 0.0455,
+      "step": 170
+    },
+    {
+      "epoch": 14.69,
+      "grad_norm": 0.17157459259033203,
+      "learning_rate": 0.00021999999999999995,
+      "loss": 0.0393,
+      "step": 180
+    },
+    {
+      "epoch": 14.69,
+      "eval_loss": 0.27233538031578064,
+      "eval_runtime": 90.1364,
+      "eval_samples_per_second": 4.293,
+      "eval_steps_per_second": 0.544,
+      "step": 180
+    },
+    {
+      "epoch": 15.51,
+      "grad_norm": 0.1541435867547989,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.0352,
+      "step": 190
+    },
+    {
+      "epoch": 16.33,
+      "grad_norm": 0.1553652435541153,
+      "learning_rate": 0.00019999999999999998,
+      "loss": 0.0325,
+      "step": 200
+    },
+    {
+      "epoch": 16.33,
+      "eval_loss": 0.28704825043678284,
+      "eval_runtime": 89.7951,
+      "eval_samples_per_second": 4.31,
+      "eval_steps_per_second": 0.546,
+      "step": 200
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 1.1471699094420849e+18,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3f0d01d8ba12c6a725736cdf727e72ec03ea9a4f
--- /dev/null
+++ b/checkpoint-200/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01be3c1366faeea704d7b18d02c117abdc170d0c96565a08a0f3ad9c5e7a123a
+size 4856
diff --git a/checkpoint-220/README.md b/checkpoint-220/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4b6f787248182d62a16f6423f948c336352c3674
--- /dev/null
+++ b/checkpoint-220/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: bigcode/starcoder
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-220/adapter_config.json b/checkpoint-220/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ab6c617ff1322585052700834abbc593bae7c619
--- /dev/null
+++ b/checkpoint-220/adapter_config.json
@@ -0,0 +1,28 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bigcode/starcoder",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_proj",
+    "c_attn",
+    "q_attn"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-220/adapter_model.safetensors b/checkpoint-220/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-220/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-220/optimizer.pt b/checkpoint-220/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2384e73e60d611465eb447ba6d53a2ecfad913c3
--- /dev/null
+++ b/checkpoint-220/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c47636495e01d276ffae9896bb8c18ce73010627055cf807383088ed2dae88fc
+size 284628602
diff --git a/checkpoint-220/rng_state.pth b/checkpoint-220/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e9c8203df2d7833c6ca18a6e7b47cb8d873b8ace
--- /dev/null
+++ b/checkpoint-220/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3edbaa73ac7f30eefe8c0dd8eb537afd3ac94c6703bee94965a470000ecd437b
+size 14244
diff --git a/checkpoint-220/scheduler.pt b/checkpoint-220/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..46f9dfab9e077ea12afa1d94a7c8aff731125da3
--- /dev/null
+++ b/checkpoint-220/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd18ed34ef6e0139641e61e552552fe67f5e90236dba94b6c8eca5985486bd06
+size 1064
diff --git a/checkpoint-220/trainer_state.json b/checkpoint-220/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..364481e9b6a9a1570acbb75ba19cacb737e94484
--- /dev/null
+++ b/checkpoint-220/trainer_state.json
@@ -0,0 +1,263 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 17.959183673469386,
+  "eval_steps": 20,
+  "global_step": 220,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.18377472460269928,
+      "learning_rate": 2.9999999999999997e-05,
+      "loss": 1.861,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.35202744603157043,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 1.7263,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": 1.4457755088806152,
+      "eval_runtime": 89.8938,
+      "eval_samples_per_second": 4.305,
+      "eval_steps_per_second": 0.545,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": 0.928983747959137,
+      "learning_rate": 8.999999999999999e-05,
+      "loss": 1.1718,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": 0.253262996673584,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.4789,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": 0.3332095146179199,
+      "eval_runtime": 89.9804,
+      "eval_samples_per_second": 4.301,
+      "eval_steps_per_second": 0.545,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.12236642092466354,
+      "learning_rate": 0.00015,
+      "loss": 0.3568,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.09160923212766647,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.3256,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": 0.2753114104270935,
+      "eval_runtime": 90.3206,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": 0.10242326557636261,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.2841,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": 0.1305350810289383,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.2615,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": 0.2476309835910797,
+      "eval_runtime": 90.525,
+      "eval_samples_per_second": 4.275,
+      "eval_steps_per_second": 0.541,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.17941106855869293,
+      "learning_rate": 0.00027,
+      "loss": 0.2216,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.20095375180244446,
+      "learning_rate": 0.0003,
+      "loss": 0.1832,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": 0.2350914180278778,
+      "eval_runtime": 90.3919,
+      "eval_samples_per_second": 4.281,
+      "eval_steps_per_second": 0.542,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": 0.2600422501564026,
+      "learning_rate": 0.00029,
+      "loss": 0.1441,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": 0.20544037222862244,
+      "learning_rate": 0.00028,
+      "loss": 0.1186,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": 0.23090216517448425,
+      "eval_runtime": 90.3144,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 120
+    },
+    {
+      "epoch": 10.61,
+      "grad_norm": 0.2158157229423523,
+      "learning_rate": 0.00027,
+      "loss": 0.0947,
+      "step": 130
+    },
+    {
+      "epoch": 11.43,
+      "grad_norm": 0.18916285037994385,
+      "learning_rate": 0.00026,
+      "loss": 0.0768,
+      "step": 140
+    },
+    {
+      "epoch": 11.43,
+      "eval_loss": 0.24214179813861847,
+      "eval_runtime": 90.2597,
+      "eval_samples_per_second": 4.288,
+      "eval_steps_per_second": 0.543,
+      "step": 140
+    },
+    {
+      "epoch": 12.24,
+      "grad_norm": 0.22263498604297638,
+      "learning_rate": 0.00025,
+      "loss": 0.0615,
+      "step": 150
+    },
+    {
+      "epoch": 13.06,
+      "grad_norm": 0.21315976977348328,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.054,
+      "step": 160
+    },
+    {
+      "epoch": 13.06,
+      "eval_loss": 0.25932466983795166,
+      "eval_runtime": 89.8439,
+      "eval_samples_per_second": 4.307,
+      "eval_steps_per_second": 0.545,
+      "step": 160
+    },
+    {
+      "epoch": 13.88,
+      "grad_norm": 0.18338361382484436,
+      "learning_rate": 0.00023,
+      "loss": 0.0455,
+      "step": 170
+    },
+    {
+      "epoch": 14.69,
+      "grad_norm": 0.17157459259033203,
+      "learning_rate": 0.00021999999999999995,
+      "loss": 0.0393,
+      "step": 180
+    },
+    {
+      "epoch": 14.69,
+      "eval_loss": 0.27233538031578064,
+      "eval_runtime": 90.1364,
+      "eval_samples_per_second": 4.293,
+      "eval_steps_per_second": 0.544,
+      "step": 180
+    },
+    {
+      "epoch": 15.51,
+      "grad_norm": 0.1541435867547989,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.0352,
+      "step": 190
+    },
+    {
+      "epoch": 16.33,
+      "grad_norm": 0.1553652435541153,
+      "learning_rate": 0.00019999999999999998,
+      "loss": 0.0325,
+      "step": 200
+    },
+    {
+      "epoch": 16.33,
+      "eval_loss": 0.28704825043678284,
+      "eval_runtime": 89.7951,
+      "eval_samples_per_second": 4.31,
+      "eval_steps_per_second": 0.546,
+      "step": 200
+    },
+    {
+      "epoch": 17.14,
+      "grad_norm": 0.13403691351413727,
+      "learning_rate": 0.00018999999999999998,
+      "loss": 0.0297,
+      "step": 210
+    },
+    {
+      "epoch": 17.96,
+      "grad_norm": 0.14512716233730316,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.0279,
+      "step": 220
+    },
+    {
+      "epoch": 17.96,
+      "eval_loss": 0.2964874505996704,
+      "eval_runtime": 89.7009,
+      "eval_samples_per_second": 4.314,
+      "eval_steps_per_second": 0.546,
+      "step": 220
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 1.262300164534567e+18,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-220/training_args.bin b/checkpoint-220/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3f0d01d8ba12c6a725736cdf727e72ec03ea9a4f
--- /dev/null
+++ b/checkpoint-220/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01be3c1366faeea704d7b18d02c117abdc170d0c96565a08a0f3ad9c5e7a123a
+size 4856
diff --git a/checkpoint-240/README.md b/checkpoint-240/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4b6f787248182d62a16f6423f948c336352c3674
--- /dev/null
+++ b/checkpoint-240/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: bigcode/starcoder
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-240/adapter_config.json b/checkpoint-240/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ab6c617ff1322585052700834abbc593bae7c619
--- /dev/null
+++ b/checkpoint-240/adapter_config.json
@@ -0,0 +1,28 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bigcode/starcoder",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_proj",
+    "c_attn",
+    "q_attn"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-240/adapter_model.safetensors b/checkpoint-240/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-240/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-240/optimizer.pt b/checkpoint-240/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2d6334077859fcf334b9215fc684bc57fcfc7ccb
--- /dev/null
+++ b/checkpoint-240/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:300e34361117aac8ed136e5d7a7d3ae6b8594ef84ea1cb9a4df35b4290be6988
+size 284628602
diff --git a/checkpoint-240/rng_state.pth b/checkpoint-240/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c2527708f69e3183138740d2549c7d1dcfe140f7
--- /dev/null
+++ b/checkpoint-240/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d95dcc3c79699f6cd0d3a5165464d687a39bc265545d10ef96194e4b4411d702
+size 14244
diff --git a/checkpoint-240/scheduler.pt b/checkpoint-240/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cd718e258bc8056a6e0a813b8d692817c2f44439
--- /dev/null
+++ b/checkpoint-240/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35029adf67ec7b6288431b13620690917e0d68e95151a8d732f3eb783ef0c816
+size 1064
diff --git a/checkpoint-240/trainer_state.json b/checkpoint-240/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..08e314dda5bac2a9ecd3144645506cdea42c3f14
--- /dev/null
+++ b/checkpoint-240/trainer_state.json
@@ -0,0 +1,285 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 19.591836734693878,
+  "eval_steps": 20,
+  "global_step": 240,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.18377472460269928,
+      "learning_rate": 2.9999999999999997e-05,
+      "loss": 1.861,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.35202744603157043,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 1.7263,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": 1.4457755088806152,
+      "eval_runtime": 89.8938,
+      "eval_samples_per_second": 4.305,
+      "eval_steps_per_second": 0.545,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": 0.928983747959137,
+      "learning_rate": 8.999999999999999e-05,
+      "loss": 1.1718,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": 0.253262996673584,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.4789,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": 0.3332095146179199,
+      "eval_runtime": 89.9804,
+      "eval_samples_per_second": 4.301,
+      "eval_steps_per_second": 0.545,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.12236642092466354,
+      "learning_rate": 0.00015,
+      "loss": 0.3568,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.09160923212766647,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.3256,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": 0.2753114104270935,
+      "eval_runtime": 90.3206,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": 0.10242326557636261,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.2841,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": 0.1305350810289383,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.2615,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": 0.2476309835910797,
+      "eval_runtime": 90.525,
+      "eval_samples_per_second": 4.275,
+      "eval_steps_per_second": 0.541,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.17941106855869293,
+      "learning_rate": 0.00027,
+      "loss": 0.2216,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.20095375180244446,
+      "learning_rate": 0.0003,
+      "loss": 0.1832,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": 0.2350914180278778,
+      "eval_runtime": 90.3919,
+      "eval_samples_per_second": 4.281,
+      "eval_steps_per_second": 0.542,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": 0.2600422501564026,
+      "learning_rate": 0.00029,
+      "loss": 0.1441,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": 0.20544037222862244,
+      "learning_rate": 0.00028,
+      "loss": 0.1186,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": 0.23090216517448425,
+      "eval_runtime": 90.3144,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 120
+    },
+    {
+      "epoch": 10.61,
+      "grad_norm": 0.2158157229423523,
+      "learning_rate": 0.00027,
+      "loss": 0.0947,
+      "step": 130
+    },
+    {
+      "epoch": 11.43,
+      "grad_norm": 0.18916285037994385,
+      "learning_rate": 0.00026,
+      "loss": 0.0768,
+      "step": 140
+    },
+    {
+      "epoch": 11.43,
+      "eval_loss": 0.24214179813861847,
+      "eval_runtime": 90.2597,
+      "eval_samples_per_second": 4.288,
+      "eval_steps_per_second": 0.543,
+      "step": 140
+    },
+    {
+      "epoch": 12.24,
+      "grad_norm": 0.22263498604297638,
+      "learning_rate": 0.00025,
+      "loss": 0.0615,
+      "step": 150
+    },
+    {
+      "epoch": 13.06,
+      "grad_norm": 0.21315976977348328,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.054,
+      "step": 160
+    },
+    {
+      "epoch": 13.06,
+      "eval_loss": 0.25932466983795166,
+      "eval_runtime": 89.8439,
+      "eval_samples_per_second": 4.307,
+      "eval_steps_per_second": 0.545,
+      "step": 160
+    },
+    {
+      "epoch": 13.88,
+      "grad_norm": 0.18338361382484436,
+      "learning_rate": 0.00023,
+      "loss": 0.0455,
+      "step": 170
+    },
+    {
+      "epoch": 14.69,
+      "grad_norm": 0.17157459259033203,
+      "learning_rate": 0.00021999999999999995,
+      "loss": 0.0393,
+      "step": 180
+    },
+    {
+      "epoch": 14.69,
+      "eval_loss": 0.27233538031578064,
+      "eval_runtime": 90.1364,
+      "eval_samples_per_second": 4.293,
+      "eval_steps_per_second": 0.544,
+      "step": 180
+    },
+    {
+      "epoch": 15.51,
+      "grad_norm": 0.1541435867547989,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.0352,
+      "step": 190
+    },
+    {
+      "epoch": 16.33,
+      "grad_norm": 0.1553652435541153,
+      "learning_rate": 0.00019999999999999998,
+      "loss": 0.0325,
+      "step": 200
+    },
+    {
+      "epoch": 16.33,
+      "eval_loss": 0.28704825043678284,
+      "eval_runtime": 89.7951,
+      "eval_samples_per_second": 4.31,
+      "eval_steps_per_second": 0.546,
+      "step": 200
+    },
+    {
+      "epoch": 17.14,
+      "grad_norm": 0.13403691351413727,
+      "learning_rate": 0.00018999999999999998,
+      "loss": 0.0297,
+      "step": 210
+    },
+    {
+      "epoch": 17.96,
+      "grad_norm": 0.14512716233730316,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.0279,
+      "step": 220
+    },
+    {
+      "epoch": 17.96,
+      "eval_loss": 0.2964874505996704,
+      "eval_runtime": 89.7009,
+      "eval_samples_per_second": 4.314,
+      "eval_steps_per_second": 0.546,
+      "step": 220
+    },
+    {
+      "epoch": 18.78,
+      "grad_norm": 0.12400835007429123,
+      "learning_rate": 0.00016999999999999999,
+      "loss": 0.0263,
+      "step": 230
+    },
+    {
+      "epoch": 19.59,
+      "grad_norm": 0.1139909029006958,
+      "learning_rate": 0.00015999999999999999,
+      "loss": 0.0246,
+      "step": 240
+    },
+    {
+      "epoch": 19.59,
+      "eval_loss": 0.30519917607307434,
+      "eval_runtime": 89.8387,
+      "eval_samples_per_second": 4.308,
+      "eval_steps_per_second": 0.545,
+      "step": 240
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 1.3766132306332877e+18,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-240/training_args.bin b/checkpoint-240/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3f0d01d8ba12c6a725736cdf727e72ec03ea9a4f
--- /dev/null
+++ b/checkpoint-240/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01be3c1366faeea704d7b18d02c117abdc170d0c96565a08a0f3ad9c5e7a123a
+size 4856
diff --git a/checkpoint-260/README.md b/checkpoint-260/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4b6f787248182d62a16f6423f948c336352c3674
--- /dev/null
+++ b/checkpoint-260/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: bigcode/starcoder
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-260/adapter_config.json b/checkpoint-260/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ab6c617ff1322585052700834abbc593bae7c619
--- /dev/null
+++ b/checkpoint-260/adapter_config.json
@@ -0,0 +1,28 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bigcode/starcoder",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_proj",
+    "c_attn",
+    "q_attn"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-260/adapter_model.safetensors b/checkpoint-260/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-260/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-260/optimizer.pt b/checkpoint-260/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9ebc0b95b26ccd025f46abeb9451f05222969317
--- /dev/null
+++ b/checkpoint-260/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce9fe32629270fa64ee19a35edef93a0e79ff41115edd10fb6e268a6676e09c1
+size 284628602
diff --git a/checkpoint-260/rng_state.pth b/checkpoint-260/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5116ad0b14fcec210ce4114fc4bfdfbc5c4bc4bf
--- /dev/null
+++ b/checkpoint-260/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e089d826e2d134b035865eb0da40dc9031b0478f13706ba2458c1d7e22fc5747
+size 14244
diff --git a/checkpoint-260/scheduler.pt b/checkpoint-260/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7dc1cbf308c73ec7f2c8ba6761bfa8c3cb511190
--- /dev/null
+++ b/checkpoint-260/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:618426a7b0745c9d6476d3c7a67a48976de53907877b0f786d6ae0b3e2e942da
+size 1064
diff --git a/checkpoint-260/trainer_state.json b/checkpoint-260/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..a933bc70843aa7c572975e3f5129556bf5819742
--- /dev/null
+++ b/checkpoint-260/trainer_state.json
@@ -0,0 +1,307 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 21.224489795918366,
+  "eval_steps": 20,
+  "global_step": 260,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.18377472460269928,
+      "learning_rate": 2.9999999999999997e-05,
+      "loss": 1.861,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.35202744603157043,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 1.7263,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": 1.4457755088806152,
+      "eval_runtime": 89.8938,
+      "eval_samples_per_second": 4.305,
+      "eval_steps_per_second": 0.545,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": 0.928983747959137,
+      "learning_rate": 8.999999999999999e-05,
+      "loss": 1.1718,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": 0.253262996673584,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.4789,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": 0.3332095146179199,
+      "eval_runtime": 89.9804,
+      "eval_samples_per_second": 4.301,
+      "eval_steps_per_second": 0.545,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.12236642092466354,
+      "learning_rate": 0.00015,
+      "loss": 0.3568,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.09160923212766647,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.3256,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": 0.2753114104270935,
+      "eval_runtime": 90.3206,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": 0.10242326557636261,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.2841,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": 0.1305350810289383,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.2615,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": 0.2476309835910797,
+      "eval_runtime": 90.525,
+      "eval_samples_per_second": 4.275,
+      "eval_steps_per_second": 0.541,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.17941106855869293,
+      "learning_rate": 0.00027,
+      "loss": 0.2216,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.20095375180244446,
+      "learning_rate": 0.0003,
+      "loss": 0.1832,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": 0.2350914180278778,
+      "eval_runtime": 90.3919,
+      "eval_samples_per_second": 4.281,
+      "eval_steps_per_second": 0.542,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": 0.2600422501564026,
+      "learning_rate": 0.00029,
+      "loss": 0.1441,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": 0.20544037222862244,
+      "learning_rate": 0.00028,
+      "loss": 0.1186,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": 0.23090216517448425,
+      "eval_runtime": 90.3144,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 120
+    },
+    {
+      "epoch": 10.61,
+      "grad_norm": 0.2158157229423523,
+      "learning_rate": 0.00027,
+      "loss": 0.0947,
+      "step": 130
+    },
+    {
+      "epoch": 11.43,
+      "grad_norm": 0.18916285037994385,
+      "learning_rate": 0.00026,
+      "loss": 0.0768,
+      "step": 140
+    },
+    {
+      "epoch": 11.43,
+      "eval_loss": 0.24214179813861847,
+      "eval_runtime": 90.2597,
+      "eval_samples_per_second": 4.288,
+      "eval_steps_per_second": 0.543,
+      "step": 140
+    },
+    {
+      "epoch": 12.24,
+      "grad_norm": 0.22263498604297638,
+      "learning_rate": 0.00025,
+      "loss": 0.0615,
+      "step": 150
+    },
+    {
+      "epoch": 13.06,
+      "grad_norm": 0.21315976977348328,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.054,
+      "step": 160
+    },
+    {
+      "epoch": 13.06,
+      "eval_loss": 0.25932466983795166,
+      "eval_runtime": 89.8439,
+      "eval_samples_per_second": 4.307,
+      "eval_steps_per_second": 0.545,
+      "step": 160
+    },
+    {
+      "epoch": 13.88,
+      "grad_norm": 0.18338361382484436,
+      "learning_rate": 0.00023,
+      "loss": 0.0455,
+      "step": 170
+    },
+    {
+      "epoch": 14.69,
+      "grad_norm": 0.17157459259033203,
+      "learning_rate": 0.00021999999999999995,
+      "loss": 0.0393,
+      "step": 180
+    },
+    {
+      "epoch": 14.69,
+      "eval_loss": 0.27233538031578064,
+      "eval_runtime": 90.1364,
+      "eval_samples_per_second": 4.293,
+      "eval_steps_per_second": 0.544,
+      "step": 180
+    },
+    {
+      "epoch": 15.51,
+      "grad_norm": 0.1541435867547989,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.0352,
+      "step": 190
+    },
+    {
+      "epoch": 16.33,
+      "grad_norm": 0.1553652435541153,
+      "learning_rate": 0.00019999999999999998,
+      "loss": 0.0325,
+      "step": 200
+    },
+    {
+      "epoch": 16.33,
+      "eval_loss": 0.28704825043678284,
+      "eval_runtime": 89.7951,
+      "eval_samples_per_second": 4.31,
+      "eval_steps_per_second": 0.546,
+      "step": 200
+    },
+    {
+      "epoch": 17.14,
+      "grad_norm": 0.13403691351413727,
+      "learning_rate": 0.00018999999999999998,
+      "loss": 0.0297,
+      "step": 210
+    },
+    {
+      "epoch": 17.96,
+      "grad_norm": 0.14512716233730316,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.0279,
+      "step": 220
+    },
+    {
+      "epoch": 17.96,
+      "eval_loss": 0.2964874505996704,
+      "eval_runtime": 89.7009,
+      "eval_samples_per_second": 4.314,
+      "eval_steps_per_second": 0.546,
+      "step": 220
+    },
+    {
+      "epoch": 18.78,
+      "grad_norm": 0.12400835007429123,
+      "learning_rate": 0.00016999999999999999,
+      "loss": 0.0263,
+      "step": 230
+    },
+    {
+      "epoch": 19.59,
+      "grad_norm": 0.1139909029006958,
+      "learning_rate": 0.00015999999999999999,
+      "loss": 0.0246,
+      "step": 240
+    },
+    {
+      "epoch": 19.59,
+      "eval_loss": 0.30519917607307434,
+      "eval_runtime": 89.8387,
+      "eval_samples_per_second": 4.308,
+      "eval_steps_per_second": 0.545,
+      "step": 240
+    },
+    {
+      "epoch": 20.41,
+      "grad_norm": 0.12317101657390594,
+      "learning_rate": 0.00015,
+      "loss": 0.0235,
+      "step": 250
+    },
+    {
+      "epoch": 21.22,
+      "grad_norm": 0.12494686245918274,
+      "learning_rate": 0.00014,
+      "loss": 0.0224,
+      "step": 260
+    },
+    {
+      "epoch": 21.22,
+      "eval_loss": 0.314134418964386,
+      "eval_runtime": 89.7974,
+      "eval_samples_per_second": 4.31,
+      "eval_steps_per_second": 0.546,
+      "step": 260
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 1.49113643104469e+18,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-260/training_args.bin b/checkpoint-260/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3f0d01d8ba12c6a725736cdf727e72ec03ea9a4f
--- /dev/null
+++ b/checkpoint-260/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01be3c1366faeea704d7b18d02c117abdc170d0c96565a08a0f3ad9c5e7a123a
+size 4856
diff --git a/checkpoint-280/README.md b/checkpoint-280/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4b6f787248182d62a16f6423f948c336352c3674
--- /dev/null
+++ b/checkpoint-280/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: bigcode/starcoder
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-280/adapter_config.json b/checkpoint-280/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ab6c617ff1322585052700834abbc593bae7c619
--- /dev/null
+++ b/checkpoint-280/adapter_config.json
@@ -0,0 +1,28 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bigcode/starcoder",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_proj",
+    "c_attn",
+    "q_attn"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-280/adapter_model.safetensors b/checkpoint-280/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-280/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-280/optimizer.pt b/checkpoint-280/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..68e3782ce4c0252f68f4a8ba061748447b84c9d9
--- /dev/null
+++ b/checkpoint-280/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01fcc6ba5ccd409790320e8a4ac2e3007d82fa6d14e8f05f6c3933b60a6aaa95
+size 284628602
diff --git a/checkpoint-280/rng_state.pth b/checkpoint-280/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f6246157c94c5db7a176f0945a202ae2b9934970
--- /dev/null
+++ b/checkpoint-280/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2316503a65fe2763f2ee91e2cd06e9da95858ca7b41cee85369a45dab120c2ba
+size 14244
diff --git a/checkpoint-280/scheduler.pt b/checkpoint-280/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a2eea9aa1d6851803e5b4cf6bad706f10867b92a
--- /dev/null
+++ b/checkpoint-280/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:326b29a713a65b60d1cdb16e40a0ac0c9e0bc23145aea4ec7d4e2e525de2f511
+size 1064
diff --git a/checkpoint-280/trainer_state.json b/checkpoint-280/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..647f08f7c77603bc0cd1b7738aa733cc5421d91d
--- /dev/null
+++ b/checkpoint-280/trainer_state.json
@@ -0,0 +1,329 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 22.857142857142858,
+  "eval_steps": 20,
+  "global_step": 280,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.18377472460269928,
+      "learning_rate": 2.9999999999999997e-05,
+      "loss": 1.861,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.35202744603157043,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 1.7263,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": 1.4457755088806152,
+      "eval_runtime": 89.8938,
+      "eval_samples_per_second": 4.305,
+      "eval_steps_per_second": 0.545,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": 0.928983747959137,
+      "learning_rate": 8.999999999999999e-05,
+      "loss": 1.1718,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": 0.253262996673584,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.4789,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": 0.3332095146179199,
+      "eval_runtime": 89.9804,
+      "eval_samples_per_second": 4.301,
+      "eval_steps_per_second": 0.545,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.12236642092466354,
+      "learning_rate": 0.00015,
+      "loss": 0.3568,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.09160923212766647,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.3256,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": 0.2753114104270935,
+      "eval_runtime": 90.3206,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": 0.10242326557636261,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.2841,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": 0.1305350810289383,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.2615,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": 0.2476309835910797,
+      "eval_runtime": 90.525,
+      "eval_samples_per_second": 4.275,
+      "eval_steps_per_second": 0.541,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.17941106855869293,
+      "learning_rate": 0.00027,
+      "loss": 0.2216,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.20095375180244446,
+      "learning_rate": 0.0003,
+      "loss": 0.1832,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": 0.2350914180278778,
+      "eval_runtime": 90.3919,
+      "eval_samples_per_second": 4.281,
+      "eval_steps_per_second": 0.542,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": 0.2600422501564026,
+      "learning_rate": 0.00029,
+      "loss": 0.1441,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": 0.20544037222862244,
+      "learning_rate": 0.00028,
+      "loss": 0.1186,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": 0.23090216517448425,
+      "eval_runtime": 90.3144,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 120
+    },
+    {
+      "epoch": 10.61,
+      "grad_norm": 0.2158157229423523,
+      "learning_rate": 0.00027,
+      "loss": 0.0947,
+      "step": 130
+    },
+    {
+      "epoch": 11.43,
+      "grad_norm": 0.18916285037994385,
+      "learning_rate": 0.00026,
+      "loss": 0.0768,
+      "step": 140
+    },
+    {
+      "epoch": 11.43,
+      "eval_loss": 0.24214179813861847,
+      "eval_runtime": 90.2597,
+      "eval_samples_per_second": 4.288,
+      "eval_steps_per_second": 0.543,
+      "step": 140
+    },
+    {
+      "epoch": 12.24,
+      "grad_norm": 0.22263498604297638,
+      "learning_rate": 0.00025,
+      "loss": 0.0615,
+      "step": 150
+    },
+    {
+      "epoch": 13.06,
+      "grad_norm": 0.21315976977348328,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.054,
+      "step": 160
+    },
+    {
+      "epoch": 13.06,
+      "eval_loss": 0.25932466983795166,
+      "eval_runtime": 89.8439,
+      "eval_samples_per_second": 4.307,
+      "eval_steps_per_second": 0.545,
+      "step": 160
+    },
+    {
+      "epoch": 13.88,
+      "grad_norm": 0.18338361382484436,
+      "learning_rate": 0.00023,
+      "loss": 0.0455,
+      "step": 170
+    },
+    {
+      "epoch": 14.69,
+      "grad_norm": 0.17157459259033203,
+      "learning_rate": 0.00021999999999999995,
+      "loss": 0.0393,
+      "step": 180
+    },
+    {
+      "epoch": 14.69,
+      "eval_loss": 0.27233538031578064,
+      "eval_runtime": 90.1364,
+      "eval_samples_per_second": 4.293,
+      "eval_steps_per_second": 0.544,
+      "step": 180
+    },
+    {
+      "epoch": 15.51,
+      "grad_norm": 0.1541435867547989,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.0352,
+      "step": 190
+    },
+    {
+      "epoch": 16.33,
+      "grad_norm": 0.1553652435541153,
+      "learning_rate": 0.00019999999999999998,
+      "loss": 0.0325,
+      "step": 200
+    },
+    {
+      "epoch": 16.33,
+      "eval_loss": 0.28704825043678284,
+      "eval_runtime": 89.7951,
+      "eval_samples_per_second": 4.31,
+      "eval_steps_per_second": 0.546,
+      "step": 200
+    },
+    {
+      "epoch": 17.14,
+      "grad_norm": 0.13403691351413727,
+      "learning_rate": 0.00018999999999999998,
+      "loss": 0.0297,
+      "step": 210
+    },
+    {
+      "epoch": 17.96,
+      "grad_norm": 0.14512716233730316,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.0279,
+      "step": 220
+    },
+    {
+      "epoch": 17.96,
+      "eval_loss": 0.2964874505996704,
+      "eval_runtime": 89.7009,
+      "eval_samples_per_second": 4.314,
+      "eval_steps_per_second": 0.546,
+      "step": 220
+    },
+    {
+      "epoch": 18.78,
+      "grad_norm": 0.12400835007429123,
+      "learning_rate": 0.00016999999999999999,
+      "loss": 0.0263,
+      "step": 230
+    },
+    {
+      "epoch": 19.59,
+      "grad_norm": 0.1139909029006958,
+      "learning_rate": 0.00015999999999999999,
+      "loss": 0.0246,
+      "step": 240
+    },
+    {
+      "epoch": 19.59,
+      "eval_loss": 0.30519917607307434,
+      "eval_runtime": 89.8387,
+      "eval_samples_per_second": 4.308,
+      "eval_steps_per_second": 0.545,
+      "step": 240
+    },
+    {
+      "epoch": 20.41,
+      "grad_norm": 0.12317101657390594,
+      "learning_rate": 0.00015,
+      "loss": 0.0235,
+      "step": 250
+    },
+    {
+      "epoch": 21.22,
+      "grad_norm": 0.12494686245918274,
+      "learning_rate": 0.00014,
+      "loss": 0.0224,
+      "step": 260
+    },
+    {
+      "epoch": 21.22,
+      "eval_loss": 0.314134418964386,
+      "eval_runtime": 89.7974,
+      "eval_samples_per_second": 4.31,
+      "eval_steps_per_second": 0.546,
+      "step": 260
+    },
+    {
+      "epoch": 22.04,
+      "grad_norm": 0.1180659756064415,
+      "learning_rate": 0.00013,
+      "loss": 0.022,
+      "step": 270
+    },
+    {
+      "epoch": 22.86,
+      "grad_norm": 0.09653373062610626,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.0212,
+      "step": 280
+    },
+    {
+      "epoch": 22.86,
+      "eval_loss": 0.3175604045391083,
+      "eval_runtime": 89.9764,
+      "eval_samples_per_second": 4.301,
+      "eval_steps_per_second": 0.545,
+      "step": 280
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 1.6064067756789596e+18,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-280/training_args.bin b/checkpoint-280/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3f0d01d8ba12c6a725736cdf727e72ec03ea9a4f
--- /dev/null
+++ b/checkpoint-280/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01be3c1366faeea704d7b18d02c117abdc170d0c96565a08a0f3ad9c5e7a123a
+size 4856
diff --git a/checkpoint-300/README.md b/checkpoint-300/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4b6f787248182d62a16f6423f948c336352c3674
--- /dev/null
+++ b/checkpoint-300/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: bigcode/starcoder
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-300/adapter_config.json b/checkpoint-300/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ab6c617ff1322585052700834abbc593bae7c619
--- /dev/null
+++ b/checkpoint-300/adapter_config.json
@@ -0,0 +1,28 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bigcode/starcoder",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_proj",
+    "c_attn",
+    "q_attn"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-300/adapter_model.safetensors b/checkpoint-300/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-300/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-300/optimizer.pt b/checkpoint-300/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8f3a17832fe7ab770b972fb49d3aa68c1e4d05f7
--- /dev/null
+++ b/checkpoint-300/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f95d4f3b0790d8b9f70228f751a90a874bd1c329b0279d0d4f356aa76817469
+size 284628602
diff --git a/checkpoint-300/rng_state.pth b/checkpoint-300/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a9b112642217f03203385840075fb1fa9af37a4e
--- /dev/null
+++ b/checkpoint-300/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56ac9836af59089a7eed0a7a9a522b10102e0f768a815b08c8934678c7d8f27a
+size 14244
diff --git a/checkpoint-300/scheduler.pt b/checkpoint-300/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..043065694100117ec9ec34d87d4119760c55d60e
--- /dev/null
+++ b/checkpoint-300/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:02a846f1b53d06b3a5385e00f3c382802f5449c10c5faa9bc984677d196316c2
+size 1064
diff --git a/checkpoint-300/trainer_state.json b/checkpoint-300/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..7de1880ad5ba7edd4c41f963df0bc5ce6554339b
--- /dev/null
+++ b/checkpoint-300/trainer_state.json
@@ -0,0 +1,351 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 24.489795918367346,
+  "eval_steps": 20,
+  "global_step": 300,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.18377472460269928,
+      "learning_rate": 2.9999999999999997e-05,
+      "loss": 1.861,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.35202744603157043,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 1.7263,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": 1.4457755088806152,
+      "eval_runtime": 89.8938,
+      "eval_samples_per_second": 4.305,
+      "eval_steps_per_second": 0.545,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": 0.928983747959137,
+      "learning_rate": 8.999999999999999e-05,
+      "loss": 1.1718,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": 0.253262996673584,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.4789,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": 0.3332095146179199,
+      "eval_runtime": 89.9804,
+      "eval_samples_per_second": 4.301,
+      "eval_steps_per_second": 0.545,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.12236642092466354,
+      "learning_rate": 0.00015,
+      "loss": 0.3568,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.09160923212766647,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.3256,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": 0.2753114104270935,
+      "eval_runtime": 90.3206,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": 0.10242326557636261,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.2841,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": 0.1305350810289383,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.2615,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": 0.2476309835910797,
+      "eval_runtime": 90.525,
+      "eval_samples_per_second": 4.275,
+      "eval_steps_per_second": 0.541,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.17941106855869293,
+      "learning_rate": 0.00027,
+      "loss": 0.2216,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.20095375180244446,
+      "learning_rate": 0.0003,
+      "loss": 0.1832,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": 0.2350914180278778,
+      "eval_runtime": 90.3919,
+      "eval_samples_per_second": 4.281,
+      "eval_steps_per_second": 0.542,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": 0.2600422501564026,
+      "learning_rate": 0.00029,
+      "loss": 0.1441,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": 0.20544037222862244,
+      "learning_rate": 0.00028,
+      "loss": 0.1186,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": 0.23090216517448425,
+      "eval_runtime": 90.3144,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 120
+    },
+    {
+      "epoch": 10.61,
+      "grad_norm": 0.2158157229423523,
+      "learning_rate": 0.00027,
+      "loss": 0.0947,
+      "step": 130
+    },
+    {
+      "epoch": 11.43,
+      "grad_norm": 0.18916285037994385,
+      "learning_rate": 0.00026,
+      "loss": 0.0768,
+      "step": 140
+    },
+    {
+      "epoch": 11.43,
+      "eval_loss": 0.24214179813861847,
+      "eval_runtime": 90.2597,
+      "eval_samples_per_second": 4.288,
+      "eval_steps_per_second": 0.543,
+      "step": 140
+    },
+    {
+      "epoch": 12.24,
+      "grad_norm": 0.22263498604297638,
+      "learning_rate": 0.00025,
+      "loss": 0.0615,
+      "step": 150
+    },
+    {
+      "epoch": 13.06,
+      "grad_norm": 0.21315976977348328,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.054,
+      "step": 160
+    },
+    {
+      "epoch": 13.06,
+      "eval_loss": 0.25932466983795166,
+      "eval_runtime": 89.8439,
+      "eval_samples_per_second": 4.307,
+      "eval_steps_per_second": 0.545,
+      "step": 160
+    },
+    {
+      "epoch": 13.88,
+      "grad_norm": 0.18338361382484436,
+      "learning_rate": 0.00023,
+      "loss": 0.0455,
+      "step": 170
+    },
+    {
+      "epoch": 14.69,
+      "grad_norm": 0.17157459259033203,
+      "learning_rate": 0.00021999999999999995,
+      "loss": 0.0393,
+      "step": 180
+    },
+    {
+      "epoch": 14.69,
+      "eval_loss": 0.27233538031578064,
+      "eval_runtime": 90.1364,
+      "eval_samples_per_second": 4.293,
+      "eval_steps_per_second": 0.544,
+      "step": 180
+    },
+    {
+      "epoch": 15.51,
+      "grad_norm": 0.1541435867547989,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.0352,
+      "step": 190
+    },
+    {
+      "epoch": 16.33,
+      "grad_norm": 0.1553652435541153,
+      "learning_rate": 0.00019999999999999998,
+      "loss": 0.0325,
+      "step": 200
+    },
+    {
+      "epoch": 16.33,
+      "eval_loss": 0.28704825043678284,
+      "eval_runtime": 89.7951,
+      "eval_samples_per_second": 4.31,
+      "eval_steps_per_second": 0.546,
+      "step": 200
+    },
+    {
+      "epoch": 17.14,
+      "grad_norm": 0.13403691351413727,
+      "learning_rate": 0.00018999999999999998,
+      "loss": 0.0297,
+      "step": 210
+    },
+    {
+      "epoch": 17.96,
+      "grad_norm": 0.14512716233730316,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.0279,
+      "step": 220
+    },
+    {
+      "epoch": 17.96,
+      "eval_loss": 0.2964874505996704,
+      "eval_runtime": 89.7009,
+      "eval_samples_per_second": 4.314,
+      "eval_steps_per_second": 0.546,
+      "step": 220
+    },
+    {
+      "epoch": 18.78,
+      "grad_norm": 0.12400835007429123,
+      "learning_rate": 0.00016999999999999999,
+      "loss": 0.0263,
+      "step": 230
+    },
+    {
+      "epoch": 19.59,
+      "grad_norm": 0.1139909029006958,
+      "learning_rate": 0.00015999999999999999,
+      "loss": 0.0246,
+      "step": 240
+    },
+    {
+      "epoch": 19.59,
+      "eval_loss": 0.30519917607307434,
+      "eval_runtime": 89.8387,
+      "eval_samples_per_second": 4.308,
+      "eval_steps_per_second": 0.545,
+      "step": 240
+    },
+    {
+      "epoch": 20.41,
+      "grad_norm": 0.12317101657390594,
+      "learning_rate": 0.00015,
+      "loss": 0.0235,
+      "step": 250
+    },
+    {
+      "epoch": 21.22,
+      "grad_norm": 0.12494686245918274,
+      "learning_rate": 0.00014,
+      "loss": 0.0224,
+      "step": 260
+    },
+    {
+      "epoch": 21.22,
+      "eval_loss": 0.314134418964386,
+      "eval_runtime": 89.7974,
+      "eval_samples_per_second": 4.31,
+      "eval_steps_per_second": 0.546,
+      "step": 260
+    },
+    {
+      "epoch": 22.04,
+      "grad_norm": 0.1180659756064415,
+      "learning_rate": 0.00013,
+      "loss": 0.022,
+      "step": 270
+    },
+    {
+      "epoch": 22.86,
+      "grad_norm": 0.09653373062610626,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.0212,
+      "step": 280
+    },
+    {
+      "epoch": 22.86,
+      "eval_loss": 0.3175604045391083,
+      "eval_runtime": 89.9764,
+      "eval_samples_per_second": 4.301,
+      "eval_steps_per_second": 0.545,
+      "step": 280
+    },
+    {
+      "epoch": 23.67,
+      "grad_norm": 0.10445748269557953,
+      "learning_rate": 0.00010999999999999998,
+      "loss": 0.0208,
+      "step": 290
+    },
+    {
+      "epoch": 24.49,
+      "grad_norm": 0.09245337545871735,
+      "learning_rate": 9.999999999999999e-05,
+      "loss": 0.0199,
+      "step": 300
+    },
+    {
+      "epoch": 24.49,
+      "eval_loss": 0.32360976934432983,
+      "eval_runtime": 89.8613,
+      "eval_samples_per_second": 4.307,
+      "eval_steps_per_second": 0.545,
+      "step": 300
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 1.7204863592080343e+18,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-300/training_args.bin b/checkpoint-300/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3f0d01d8ba12c6a725736cdf727e72ec03ea9a4f
--- /dev/null
+++ b/checkpoint-300/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01be3c1366faeea704d7b18d02c117abdc170d0c96565a08a0f3ad9c5e7a123a
+size 4856
diff --git a/checkpoint-320/README.md b/checkpoint-320/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4b6f787248182d62a16f6423f948c336352c3674
--- /dev/null
+++ b/checkpoint-320/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: bigcode/starcoder
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-320/adapter_config.json b/checkpoint-320/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ab6c617ff1322585052700834abbc593bae7c619
--- /dev/null
+++ b/checkpoint-320/adapter_config.json
@@ -0,0 +1,28 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bigcode/starcoder",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_proj",
+    "c_attn",
+    "q_attn"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-320/adapter_model.safetensors b/checkpoint-320/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-320/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-320/optimizer.pt b/checkpoint-320/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b77f03251156a0c9a7532b815b09a0ebd39f12a9
--- /dev/null
+++ b/checkpoint-320/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9069c1605cf1b3695fbdd5333f7113b9eaf2cb631fb39dd399b73f4e07a90668
+size 284628602
diff --git a/checkpoint-320/rng_state.pth b/checkpoint-320/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f309811c696f92771c94a54ed5c6590247a29180
--- /dev/null
+++ b/checkpoint-320/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:402c1723d18bbfd6d61b2a49da9b43070c4fa33d72ea1a944508192e5a624a79
+size 14244
diff --git a/checkpoint-320/scheduler.pt b/checkpoint-320/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..44a7c96557d47c43ee760fe5e9084957e7d47bbb
--- /dev/null
+++ b/checkpoint-320/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c6f90122c7166d476e722a6703ab94e5de6361f1d1a61a844fb6fd64429c85b
+size 1064
diff --git a/checkpoint-320/trainer_state.json b/checkpoint-320/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..354167742886740ba1fcf0005f897a1956639231
--- /dev/null
+++ b/checkpoint-320/trainer_state.json
@@ -0,0 +1,373 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 26.122448979591837,
+  "eval_steps": 20,
+  "global_step": 320,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.18377472460269928,
+      "learning_rate": 2.9999999999999997e-05,
+      "loss": 1.861,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.35202744603157043,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 1.7263,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": 1.4457755088806152,
+      "eval_runtime": 89.8938,
+      "eval_samples_per_second": 4.305,
+      "eval_steps_per_second": 0.545,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": 0.928983747959137,
+      "learning_rate": 8.999999999999999e-05,
+      "loss": 1.1718,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": 0.253262996673584,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.4789,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": 0.3332095146179199,
+      "eval_runtime": 89.9804,
+      "eval_samples_per_second": 4.301,
+      "eval_steps_per_second": 0.545,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.12236642092466354,
+      "learning_rate": 0.00015,
+      "loss": 0.3568,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.09160923212766647,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.3256,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": 0.2753114104270935,
+      "eval_runtime": 90.3206,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": 0.10242326557636261,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.2841,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": 0.1305350810289383,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.2615,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": 0.2476309835910797,
+      "eval_runtime": 90.525,
+      "eval_samples_per_second": 4.275,
+      "eval_steps_per_second": 0.541,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.17941106855869293,
+      "learning_rate": 0.00027,
+      "loss": 0.2216,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.20095375180244446,
+      "learning_rate": 0.0003,
+      "loss": 0.1832,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": 0.2350914180278778,
+      "eval_runtime": 90.3919,
+      "eval_samples_per_second": 4.281,
+      "eval_steps_per_second": 0.542,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": 0.2600422501564026,
+      "learning_rate": 0.00029,
+      "loss": 0.1441,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": 0.20544037222862244,
+      "learning_rate": 0.00028,
+      "loss": 0.1186,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": 0.23090216517448425,
+      "eval_runtime": 90.3144,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 120
+    },
+    {
+      "epoch": 10.61,
+      "grad_norm": 0.2158157229423523,
+      "learning_rate": 0.00027,
+      "loss": 0.0947,
+      "step": 130
+    },
+    {
+      "epoch": 11.43,
+      "grad_norm": 0.18916285037994385,
+      "learning_rate": 0.00026,
+      "loss": 0.0768,
+      "step": 140
+    },
+    {
+      "epoch": 11.43,
+      "eval_loss": 0.24214179813861847,
+      "eval_runtime": 90.2597,
+      "eval_samples_per_second": 4.288,
+      "eval_steps_per_second": 0.543,
+      "step": 140
+    },
+    {
+      "epoch": 12.24,
+      "grad_norm": 0.22263498604297638,
+      "learning_rate": 0.00025,
+      "loss": 0.0615,
+      "step": 150
+    },
+    {
+      "epoch": 13.06,
+      "grad_norm": 0.21315976977348328,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.054,
+      "step": 160
+    },
+    {
+      "epoch": 13.06,
+      "eval_loss": 0.25932466983795166,
+      "eval_runtime": 89.8439,
+      "eval_samples_per_second": 4.307,
+      "eval_steps_per_second": 0.545,
+      "step": 160
+    },
+    {
+      "epoch": 13.88,
+      "grad_norm": 0.18338361382484436,
+      "learning_rate": 0.00023,
+      "loss": 0.0455,
+      "step": 170
+    },
+    {
+      "epoch": 14.69,
+      "grad_norm": 0.17157459259033203,
+      "learning_rate": 0.00021999999999999995,
+      "loss": 0.0393,
+      "step": 180
+    },
+    {
+      "epoch": 14.69,
+      "eval_loss": 0.27233538031578064,
+      "eval_runtime": 90.1364,
+      "eval_samples_per_second": 4.293,
+      "eval_steps_per_second": 0.544,
+      "step": 180
+    },
+    {
+      "epoch": 15.51,
+      "grad_norm": 0.1541435867547989,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.0352,
+      "step": 190
+    },
+    {
+      "epoch": 16.33,
+      "grad_norm": 0.1553652435541153,
+      "learning_rate": 0.00019999999999999998,
+      "loss": 0.0325,
+      "step": 200
+    },
+    {
+      "epoch": 16.33,
+      "eval_loss": 0.28704825043678284,
+      "eval_runtime": 89.7951,
+      "eval_samples_per_second": 4.31,
+      "eval_steps_per_second": 0.546,
+      "step": 200
+    },
+    {
+      "epoch": 17.14,
+      "grad_norm": 0.13403691351413727,
+      "learning_rate": 0.00018999999999999998,
+      "loss": 0.0297,
+      "step": 210
+    },
+    {
+      "epoch": 17.96,
+      "grad_norm": 0.14512716233730316,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.0279,
+      "step": 220
+    },
+    {
+      "epoch": 17.96,
+      "eval_loss": 0.2964874505996704,
+      "eval_runtime": 89.7009,
+      "eval_samples_per_second": 4.314,
+      "eval_steps_per_second": 0.546,
+      "step": 220
+    },
+    {
+      "epoch": 18.78,
+      "grad_norm": 0.12400835007429123,
+      "learning_rate": 0.00016999999999999999,
+      "loss": 0.0263,
+      "step": 230
+    },
+    {
+      "epoch": 19.59,
+      "grad_norm": 0.1139909029006958,
+      "learning_rate": 0.00015999999999999999,
+      "loss": 0.0246,
+      "step": 240
+    },
+    {
+      "epoch": 19.59,
+      "eval_loss": 0.30519917607307434,
+      "eval_runtime": 89.8387,
+      "eval_samples_per_second": 4.308,
+      "eval_steps_per_second": 0.545,
+      "step": 240
+    },
+    {
+      "epoch": 20.41,
+      "grad_norm": 0.12317101657390594,
+      "learning_rate": 0.00015,
+      "loss": 0.0235,
+      "step": 250
+    },
+    {
+      "epoch": 21.22,
+      "grad_norm": 0.12494686245918274,
+      "learning_rate": 0.00014,
+      "loss": 0.0224,
+      "step": 260
+    },
+    {
+      "epoch": 21.22,
+      "eval_loss": 0.314134418964386,
+      "eval_runtime": 89.7974,
+      "eval_samples_per_second": 4.31,
+      "eval_steps_per_second": 0.546,
+      "step": 260
+    },
+    {
+      "epoch": 22.04,
+      "grad_norm": 0.1180659756064415,
+      "learning_rate": 0.00013,
+      "loss": 0.022,
+      "step": 270
+    },
+    {
+      "epoch": 22.86,
+      "grad_norm": 0.09653373062610626,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.0212,
+      "step": 280
+    },
+    {
+      "epoch": 22.86,
+      "eval_loss": 0.3175604045391083,
+      "eval_runtime": 89.9764,
+      "eval_samples_per_second": 4.301,
+      "eval_steps_per_second": 0.545,
+      "step": 280
+    },
+    {
+      "epoch": 23.67,
+      "grad_norm": 0.10445748269557953,
+      "learning_rate": 0.00010999999999999998,
+      "loss": 0.0208,
+      "step": 290
+    },
+    {
+      "epoch": 24.49,
+      "grad_norm": 0.09245337545871735,
+      "learning_rate": 9.999999999999999e-05,
+      "loss": 0.0199,
+      "step": 300
+    },
+    {
+      "epoch": 24.49,
+      "eval_loss": 0.32360976934432983,
+      "eval_runtime": 89.8613,
+      "eval_samples_per_second": 4.307,
+      "eval_steps_per_second": 0.545,
+      "step": 300
+    },
+    {
+      "epoch": 25.31,
+      "grad_norm": 0.09468758851289749,
+      "learning_rate": 8.999999999999999e-05,
+      "loss": 0.0197,
+      "step": 310
+    },
+    {
+      "epoch": 26.12,
+      "grad_norm": 0.0891977846622467,
+      "learning_rate": 7.999999999999999e-05,
+      "loss": 0.0192,
+      "step": 320
+    },
+    {
+      "epoch": 26.12,
+      "eval_loss": 0.3267403841018677,
+      "eval_runtime": 90.3063,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 320
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 1.8352663904460472e+18,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-320/training_args.bin b/checkpoint-320/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3f0d01d8ba12c6a725736cdf727e72ec03ea9a4f
--- /dev/null
+++ b/checkpoint-320/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01be3c1366faeea704d7b18d02c117abdc170d0c96565a08a0f3ad9c5e7a123a
+size 4856
diff --git a/checkpoint-340/README.md b/checkpoint-340/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4b6f787248182d62a16f6423f948c336352c3674
--- /dev/null
+++ b/checkpoint-340/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: bigcode/starcoder
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-340/adapter_config.json b/checkpoint-340/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ab6c617ff1322585052700834abbc593bae7c619
--- /dev/null
+++ b/checkpoint-340/adapter_config.json
@@ -0,0 +1,28 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bigcode/starcoder",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_proj",
+    "c_attn",
+    "q_attn"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-340/adapter_model.safetensors b/checkpoint-340/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-340/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-340/optimizer.pt b/checkpoint-340/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..574687a84444a8b3a7ad4d3aea7df81378dc3787
--- /dev/null
+++ b/checkpoint-340/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aed1d3769cbffeae18166870a276b374735aff755c2d47e63d72d0cbd1bda556
+size 284628602
diff --git a/checkpoint-340/rng_state.pth b/checkpoint-340/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..85df9fcc725ccc7574a5a906b54189070c16c85e
--- /dev/null
+++ b/checkpoint-340/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5329c1b9ada42ddfb578fd23003ee82843e8f02a2d4dae483aeb12ea319fd09
+size 14244
diff --git a/checkpoint-340/scheduler.pt b/checkpoint-340/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ce1f7af7d3957bf85581fa17216b0d13cafadfac
--- /dev/null
+++ b/checkpoint-340/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:03376fa8bdbd1f35e302b4f6e9042b2c5f57834becd5c6d5f467088976873ddd
+size 1064
diff --git a/checkpoint-340/trainer_state.json b/checkpoint-340/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..694a17fb355951404c8b1741f45bb9b36e0b0ec1
--- /dev/null
+++ b/checkpoint-340/trainer_state.json
@@ -0,0 +1,395 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 27.755102040816325,
+  "eval_steps": 20,
+  "global_step": 340,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.18377472460269928,
+      "learning_rate": 2.9999999999999997e-05,
+      "loss": 1.861,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.35202744603157043,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 1.7263,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": 1.4457755088806152,
+      "eval_runtime": 89.8938,
+      "eval_samples_per_second": 4.305,
+      "eval_steps_per_second": 0.545,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": 0.928983747959137,
+      "learning_rate": 8.999999999999999e-05,
+      "loss": 1.1718,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": 0.253262996673584,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.4789,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": 0.3332095146179199,
+      "eval_runtime": 89.9804,
+      "eval_samples_per_second": 4.301,
+      "eval_steps_per_second": 0.545,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.12236642092466354,
+      "learning_rate": 0.00015,
+      "loss": 0.3568,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.09160923212766647,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.3256,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": 0.2753114104270935,
+      "eval_runtime": 90.3206,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": 0.10242326557636261,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.2841,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": 0.1305350810289383,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.2615,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": 0.2476309835910797,
+      "eval_runtime": 90.525,
+      "eval_samples_per_second": 4.275,
+      "eval_steps_per_second": 0.541,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.17941106855869293,
+      "learning_rate": 0.00027,
+      "loss": 0.2216,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.20095375180244446,
+      "learning_rate": 0.0003,
+      "loss": 0.1832,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": 0.2350914180278778,
+      "eval_runtime": 90.3919,
+      "eval_samples_per_second": 4.281,
+      "eval_steps_per_second": 0.542,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": 0.2600422501564026,
+      "learning_rate": 0.00029,
+      "loss": 0.1441,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": 0.20544037222862244,
+      "learning_rate": 0.00028,
+      "loss": 0.1186,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": 0.23090216517448425,
+      "eval_runtime": 90.3144,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 120
+    },
+    {
+      "epoch": 10.61,
+      "grad_norm": 0.2158157229423523,
+      "learning_rate": 0.00027,
+      "loss": 0.0947,
+      "step": 130
+    },
+    {
+      "epoch": 11.43,
+      "grad_norm": 0.18916285037994385,
+      "learning_rate": 0.00026,
+      "loss": 0.0768,
+      "step": 140
+    },
+    {
+      "epoch": 11.43,
+      "eval_loss": 0.24214179813861847,
+      "eval_runtime": 90.2597,
+      "eval_samples_per_second": 4.288,
+      "eval_steps_per_second": 0.543,
+      "step": 140
+    },
+    {
+      "epoch": 12.24,
+      "grad_norm": 0.22263498604297638,
+      "learning_rate": 0.00025,
+      "loss": 0.0615,
+      "step": 150
+    },
+    {
+      "epoch": 13.06,
+      "grad_norm": 0.21315976977348328,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.054,
+      "step": 160
+    },
+    {
+      "epoch": 13.06,
+      "eval_loss": 0.25932466983795166,
+      "eval_runtime": 89.8439,
+      "eval_samples_per_second": 4.307,
+      "eval_steps_per_second": 0.545,
+      "step": 160
+    },
+    {
+      "epoch": 13.88,
+      "grad_norm": 0.18338361382484436,
+      "learning_rate": 0.00023,
+      "loss": 0.0455,
+      "step": 170
+    },
+    {
+      "epoch": 14.69,
+      "grad_norm": 0.17157459259033203,
+      "learning_rate": 0.00021999999999999995,
+      "loss": 0.0393,
+      "step": 180
+    },
+    {
+      "epoch": 14.69,
+      "eval_loss": 0.27233538031578064,
+      "eval_runtime": 90.1364,
+      "eval_samples_per_second": 4.293,
+      "eval_steps_per_second": 0.544,
+      "step": 180
+    },
+    {
+      "epoch": 15.51,
+      "grad_norm": 0.1541435867547989,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.0352,
+      "step": 190
+    },
+    {
+      "epoch": 16.33,
+      "grad_norm": 0.1553652435541153,
+      "learning_rate": 0.00019999999999999998,
+      "loss": 0.0325,
+      "step": 200
+    },
+    {
+      "epoch": 16.33,
+      "eval_loss": 0.28704825043678284,
+      "eval_runtime": 89.7951,
+      "eval_samples_per_second": 4.31,
+      "eval_steps_per_second": 0.546,
+      "step": 200
+    },
+    {
+      "epoch": 17.14,
+      "grad_norm": 0.13403691351413727,
+      "learning_rate": 0.00018999999999999998,
+      "loss": 0.0297,
+      "step": 210
+    },
+    {
+      "epoch": 17.96,
+      "grad_norm": 0.14512716233730316,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.0279,
+      "step": 220
+    },
+    {
+      "epoch": 17.96,
+      "eval_loss": 0.2964874505996704,
+      "eval_runtime": 89.7009,
+      "eval_samples_per_second": 4.314,
+      "eval_steps_per_second": 0.546,
+      "step": 220
+    },
+    {
+      "epoch": 18.78,
+      "grad_norm": 0.12400835007429123,
+      "learning_rate": 0.00016999999999999999,
+      "loss": 0.0263,
+      "step": 230
+    },
+    {
+      "epoch": 19.59,
+      "grad_norm": 0.1139909029006958,
+      "learning_rate": 0.00015999999999999999,
+      "loss": 0.0246,
+      "step": 240
+    },
+    {
+      "epoch": 19.59,
+      "eval_loss": 0.30519917607307434,
+      "eval_runtime": 89.8387,
+      "eval_samples_per_second": 4.308,
+      "eval_steps_per_second": 0.545,
+      "step": 240
+    },
+    {
+      "epoch": 20.41,
+      "grad_norm": 0.12317101657390594,
+      "learning_rate": 0.00015,
+      "loss": 0.0235,
+      "step": 250
+    },
+    {
+      "epoch": 21.22,
+      "grad_norm": 0.12494686245918274,
+      "learning_rate": 0.00014,
+      "loss": 0.0224,
+      "step": 260
+    },
+    {
+      "epoch": 21.22,
+      "eval_loss": 0.314134418964386,
+      "eval_runtime": 89.7974,
+      "eval_samples_per_second": 4.31,
+      "eval_steps_per_second": 0.546,
+      "step": 260
+    },
+    {
+      "epoch": 22.04,
+      "grad_norm": 0.1180659756064415,
+      "learning_rate": 0.00013,
+      "loss": 0.022,
+      "step": 270
+    },
+    {
+      "epoch": 22.86,
+      "grad_norm": 0.09653373062610626,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.0212,
+      "step": 280
+    },
+    {
+      "epoch": 22.86,
+      "eval_loss": 0.3175604045391083,
+      "eval_runtime": 89.9764,
+      "eval_samples_per_second": 4.301,
+      "eval_steps_per_second": 0.545,
+      "step": 280
+    },
+    {
+      "epoch": 23.67,
+      "grad_norm": 0.10445748269557953,
+      "learning_rate": 0.00010999999999999998,
+      "loss": 0.0208,
+      "step": 290
+    },
+    {
+      "epoch": 24.49,
+      "grad_norm": 0.09245337545871735,
+      "learning_rate": 9.999999999999999e-05,
+      "loss": 0.0199,
+      "step": 300
+    },
+    {
+      "epoch": 24.49,
+      "eval_loss": 0.32360976934432983,
+      "eval_runtime": 89.8613,
+      "eval_samples_per_second": 4.307,
+      "eval_steps_per_second": 0.545,
+      "step": 300
+    },
+    {
+      "epoch": 25.31,
+      "grad_norm": 0.09468758851289749,
+      "learning_rate": 8.999999999999999e-05,
+      "loss": 0.0197,
+      "step": 310
+    },
+    {
+      "epoch": 26.12,
+      "grad_norm": 0.0891977846622467,
+      "learning_rate": 7.999999999999999e-05,
+      "loss": 0.0192,
+      "step": 320
+    },
+    {
+      "epoch": 26.12,
+      "eval_loss": 0.3267403841018677,
+      "eval_runtime": 90.3063,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 320
+    },
+    {
+      "epoch": 26.94,
+      "grad_norm": 0.08574336767196655,
+      "learning_rate": 7e-05,
+      "loss": 0.0188,
+      "step": 330
+    },
+    {
+      "epoch": 27.76,
+      "grad_norm": 0.08517367392778397,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 0.0184,
+      "step": 340
+    },
+    {
+      "epoch": 27.76,
+      "eval_loss": 0.33063870668411255,
+      "eval_runtime": 89.8041,
+      "eval_samples_per_second": 4.309,
+      "eval_steps_per_second": 0.546,
+      "step": 340
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 1.9503266007676355e+18,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-340/training_args.bin b/checkpoint-340/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3f0d01d8ba12c6a725736cdf727e72ec03ea9a4f
--- /dev/null
+++ b/checkpoint-340/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01be3c1366faeea704d7b18d02c117abdc170d0c96565a08a0f3ad9c5e7a123a
+size 4856
diff --git a/checkpoint-360/README.md b/checkpoint-360/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4b6f787248182d62a16f6423f948c336352c3674
--- /dev/null
+++ b/checkpoint-360/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: bigcode/starcoder
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-360/adapter_config.json b/checkpoint-360/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ab6c617ff1322585052700834abbc593bae7c619
--- /dev/null
+++ b/checkpoint-360/adapter_config.json
@@ -0,0 +1,28 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bigcode/starcoder",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_proj",
+    "c_attn",
+    "q_attn"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-360/adapter_model.safetensors b/checkpoint-360/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-360/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-360/optimizer.pt b/checkpoint-360/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d1a2caa0fe16ebd0a2aab6d4d1868e2312fc8c2a
--- /dev/null
+++ b/checkpoint-360/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:141865bc10df9b78a6f453093595dc7b4a83d54018b3fe83379cee566526a843
+size 284628602
diff --git a/checkpoint-360/rng_state.pth b/checkpoint-360/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7550b719f16999791822e5898428f5c2d1117d88
--- /dev/null
+++ b/checkpoint-360/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c8e908dccb85d3a690d3e5541946de9a9e4d091ab14c7167380ea12530d079df
+size 14244
diff --git a/checkpoint-360/scheduler.pt b/checkpoint-360/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6f81351e650ea7c1de881065d6d8cb01c4bf1781
--- /dev/null
+++ b/checkpoint-360/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35452c1466d786926545eee1f0187be30d79bc322f32d2395b7460c9b050bbbe
+size 1064
diff --git a/checkpoint-360/trainer_state.json b/checkpoint-360/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..9d01019879c208198e948c370fcc54315150a876
--- /dev/null
+++ b/checkpoint-360/trainer_state.json
@@ -0,0 +1,417 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 29.387755102040817,
+  "eval_steps": 20,
+  "global_step": 360,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.18377472460269928,
+      "learning_rate": 2.9999999999999997e-05,
+      "loss": 1.861,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.35202744603157043,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 1.7263,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": 1.4457755088806152,
+      "eval_runtime": 89.8938,
+      "eval_samples_per_second": 4.305,
+      "eval_steps_per_second": 0.545,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": 0.928983747959137,
+      "learning_rate": 8.999999999999999e-05,
+      "loss": 1.1718,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": 0.253262996673584,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.4789,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": 0.3332095146179199,
+      "eval_runtime": 89.9804,
+      "eval_samples_per_second": 4.301,
+      "eval_steps_per_second": 0.545,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.12236642092466354,
+      "learning_rate": 0.00015,
+      "loss": 0.3568,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.09160923212766647,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.3256,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": 0.2753114104270935,
+      "eval_runtime": 90.3206,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": 0.10242326557636261,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.2841,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": 0.1305350810289383,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.2615,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": 0.2476309835910797,
+      "eval_runtime": 90.525,
+      "eval_samples_per_second": 4.275,
+      "eval_steps_per_second": 0.541,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.17941106855869293,
+      "learning_rate": 0.00027,
+      "loss": 0.2216,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.20095375180244446,
+      "learning_rate": 0.0003,
+      "loss": 0.1832,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": 0.2350914180278778,
+      "eval_runtime": 90.3919,
+      "eval_samples_per_second": 4.281,
+      "eval_steps_per_second": 0.542,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": 0.2600422501564026,
+      "learning_rate": 0.00029,
+      "loss": 0.1441,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": 0.20544037222862244,
+      "learning_rate": 0.00028,
+      "loss": 0.1186,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": 0.23090216517448425,
+      "eval_runtime": 90.3144,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 120
+    },
+    {
+      "epoch": 10.61,
+      "grad_norm": 0.2158157229423523,
+      "learning_rate": 0.00027,
+      "loss": 0.0947,
+      "step": 130
+    },
+    {
+      "epoch": 11.43,
+      "grad_norm": 0.18916285037994385,
+      "learning_rate": 0.00026,
+      "loss": 0.0768,
+      "step": 140
+    },
+    {
+      "epoch": 11.43,
+      "eval_loss": 0.24214179813861847,
+      "eval_runtime": 90.2597,
+      "eval_samples_per_second": 4.288,
+      "eval_steps_per_second": 0.543,
+      "step": 140
+    },
+    {
+      "epoch": 12.24,
+      "grad_norm": 0.22263498604297638,
+      "learning_rate": 0.00025,
+      "loss": 0.0615,
+      "step": 150
+    },
+    {
+      "epoch": 13.06,
+      "grad_norm": 0.21315976977348328,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.054,
+      "step": 160
+    },
+    {
+      "epoch": 13.06,
+      "eval_loss": 0.25932466983795166,
+      "eval_runtime": 89.8439,
+      "eval_samples_per_second": 4.307,
+      "eval_steps_per_second": 0.545,
+      "step": 160
+    },
+    {
+      "epoch": 13.88,
+      "grad_norm": 0.18338361382484436,
+      "learning_rate": 0.00023,
+      "loss": 0.0455,
+      "step": 170
+    },
+    {
+      "epoch": 14.69,
+      "grad_norm": 0.17157459259033203,
+      "learning_rate": 0.00021999999999999995,
+      "loss": 0.0393,
+      "step": 180
+    },
+    {
+      "epoch": 14.69,
+      "eval_loss": 0.27233538031578064,
+      "eval_runtime": 90.1364,
+      "eval_samples_per_second": 4.293,
+      "eval_steps_per_second": 0.544,
+      "step": 180
+    },
+    {
+      "epoch": 15.51,
+      "grad_norm": 0.1541435867547989,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.0352,
+      "step": 190
+    },
+    {
+      "epoch": 16.33,
+      "grad_norm": 0.1553652435541153,
+      "learning_rate": 0.00019999999999999998,
+      "loss": 0.0325,
+      "step": 200
+    },
+    {
+      "epoch": 16.33,
+      "eval_loss": 0.28704825043678284,
+      "eval_runtime": 89.7951,
+      "eval_samples_per_second": 4.31,
+      "eval_steps_per_second": 0.546,
+      "step": 200
+    },
+    {
+      "epoch": 17.14,
+      "grad_norm": 0.13403691351413727,
+      "learning_rate": 0.00018999999999999998,
+      "loss": 0.0297,
+      "step": 210
+    },
+    {
+      "epoch": 17.96,
+      "grad_norm": 0.14512716233730316,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.0279,
+      "step": 220
+    },
+    {
+      "epoch": 17.96,
+      "eval_loss": 0.2964874505996704,
+      "eval_runtime": 89.7009,
+      "eval_samples_per_second": 4.314,
+      "eval_steps_per_second": 0.546,
+      "step": 220
+    },
+    {
+      "epoch": 18.78,
+      "grad_norm": 0.12400835007429123,
+      "learning_rate": 0.00016999999999999999,
+      "loss": 0.0263,
+      "step": 230
+    },
+    {
+      "epoch": 19.59,
+      "grad_norm": 0.1139909029006958,
+      "learning_rate": 0.00015999999999999999,
+      "loss": 0.0246,
+      "step": 240
+    },
+    {
+      "epoch": 19.59,
+      "eval_loss": 0.30519917607307434,
+      "eval_runtime": 89.8387,
+      "eval_samples_per_second": 4.308,
+      "eval_steps_per_second": 0.545,
+      "step": 240
+    },
+    {
+      "epoch": 20.41,
+      "grad_norm": 0.12317101657390594,
+      "learning_rate": 0.00015,
+      "loss": 0.0235,
+      "step": 250
+    },
+    {
+      "epoch": 21.22,
+      "grad_norm": 0.12494686245918274,
+      "learning_rate": 0.00014,
+      "loss": 0.0224,
+      "step": 260
+    },
+    {
+      "epoch": 21.22,
+      "eval_loss": 0.314134418964386,
+      "eval_runtime": 89.7974,
+      "eval_samples_per_second": 4.31,
+      "eval_steps_per_second": 0.546,
+      "step": 260
+    },
+    {
+      "epoch": 22.04,
+      "grad_norm": 0.1180659756064415,
+      "learning_rate": 0.00013,
+      "loss": 0.022,
+      "step": 270
+    },
+    {
+      "epoch": 22.86,
+      "grad_norm": 0.09653373062610626,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.0212,
+      "step": 280
+    },
+    {
+      "epoch": 22.86,
+      "eval_loss": 0.3175604045391083,
+      "eval_runtime": 89.9764,
+      "eval_samples_per_second": 4.301,
+      "eval_steps_per_second": 0.545,
+      "step": 280
+    },
+    {
+      "epoch": 23.67,
+      "grad_norm": 0.10445748269557953,
+      "learning_rate": 0.00010999999999999998,
+      "loss": 0.0208,
+      "step": 290
+    },
+    {
+      "epoch": 24.49,
+      "grad_norm": 0.09245337545871735,
+      "learning_rate": 9.999999999999999e-05,
+      "loss": 0.0199,
+      "step": 300
+    },
+    {
+      "epoch": 24.49,
+      "eval_loss": 0.32360976934432983,
+      "eval_runtime": 89.8613,
+      "eval_samples_per_second": 4.307,
+      "eval_steps_per_second": 0.545,
+      "step": 300
+    },
+    {
+      "epoch": 25.31,
+      "grad_norm": 0.09468758851289749,
+      "learning_rate": 8.999999999999999e-05,
+      "loss": 0.0197,
+      "step": 310
+    },
+    {
+      "epoch": 26.12,
+      "grad_norm": 0.0891977846622467,
+      "learning_rate": 7.999999999999999e-05,
+      "loss": 0.0192,
+      "step": 320
+    },
+    {
+      "epoch": 26.12,
+      "eval_loss": 0.3267403841018677,
+      "eval_runtime": 90.3063,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 320
+    },
+    {
+      "epoch": 26.94,
+      "grad_norm": 0.08574336767196655,
+      "learning_rate": 7e-05,
+      "loss": 0.0188,
+      "step": 330
+    },
+    {
+      "epoch": 27.76,
+      "grad_norm": 0.08517367392778397,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 0.0184,
+      "step": 340
+    },
+    {
+      "epoch": 27.76,
+      "eval_loss": 0.33063870668411255,
+      "eval_runtime": 89.8041,
+      "eval_samples_per_second": 4.309,
+      "eval_steps_per_second": 0.546,
+      "step": 340
+    },
+    {
+      "epoch": 28.57,
+      "grad_norm": 0.08357132971286774,
+      "learning_rate": 4.9999999999999996e-05,
+      "loss": 0.0181,
+      "step": 350
+    },
+    {
+      "epoch": 29.39,
+      "grad_norm": 0.08679915964603424,
+      "learning_rate": 3.9999999999999996e-05,
+      "loss": 0.0181,
+      "step": 360
+    },
+    {
+      "epoch": 29.39,
+      "eval_loss": 0.3352932929992676,
+      "eval_runtime": 90.2438,
+      "eval_samples_per_second": 4.288,
+      "eval_steps_per_second": 0.543,
+      "step": 360
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 2.0648731494360023e+18,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-360/training_args.bin b/checkpoint-360/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3f0d01d8ba12c6a725736cdf727e72ec03ea9a4f
--- /dev/null
+++ b/checkpoint-360/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01be3c1366faeea704d7b18d02c117abdc170d0c96565a08a0f3ad9c5e7a123a
+size 4856
diff --git a/checkpoint-380/README.md b/checkpoint-380/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4b6f787248182d62a16f6423f948c336352c3674
--- /dev/null
+++ b/checkpoint-380/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: bigcode/starcoder
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-380/adapter_config.json b/checkpoint-380/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ab6c617ff1322585052700834abbc593bae7c619
--- /dev/null
+++ b/checkpoint-380/adapter_config.json
@@ -0,0 +1,28 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bigcode/starcoder",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_proj",
+    "c_attn",
+    "q_attn"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-380/adapter_model.safetensors b/checkpoint-380/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-380/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-380/optimizer.pt b/checkpoint-380/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9e7acb641af9b3bd1b1d0e81ac89d6ddbef978d0
--- /dev/null
+++ b/checkpoint-380/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c2a34ca054a8a94f5560be130b0c2008ecadaf3fb3e97fa51ad5cc1446db376
+size 284628602
diff --git a/checkpoint-380/rng_state.pth b/checkpoint-380/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9a675a736c9210266b321d054f7bb24abe150068
--- /dev/null
+++ b/checkpoint-380/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c53763ba648d8eaed325a2de2f963eabbbd42e2b977ef0fc8e3b8b949cff0a5d
+size 14244
diff --git a/checkpoint-380/scheduler.pt b/checkpoint-380/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a0e2655e310c92954c1a1e203e6229349da342e9
--- /dev/null
+++ b/checkpoint-380/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a39e2df26b3f3572b475720558ff8a2e0c169132c8cb9a6d4c54b36340352b7
+size 1064
diff --git a/checkpoint-380/trainer_state.json b/checkpoint-380/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..2391dbfe3ce6f9878305bf3471f1ee505f7dd37e
--- /dev/null
+++ b/checkpoint-380/trainer_state.json
@@ -0,0 +1,439 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 31.020408163265305,
+  "eval_steps": 20,
+  "global_step": 380,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.18377472460269928,
+      "learning_rate": 2.9999999999999997e-05,
+      "loss": 1.861,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.35202744603157043,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 1.7263,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": 1.4457755088806152,
+      "eval_runtime": 89.8938,
+      "eval_samples_per_second": 4.305,
+      "eval_steps_per_second": 0.545,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": 0.928983747959137,
+      "learning_rate": 8.999999999999999e-05,
+      "loss": 1.1718,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": 0.253262996673584,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.4789,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": 0.3332095146179199,
+      "eval_runtime": 89.9804,
+      "eval_samples_per_second": 4.301,
+      "eval_steps_per_second": 0.545,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.12236642092466354,
+      "learning_rate": 0.00015,
+      "loss": 0.3568,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.09160923212766647,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.3256,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": 0.2753114104270935,
+      "eval_runtime": 90.3206,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": 0.10242326557636261,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.2841,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": 0.1305350810289383,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.2615,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": 0.2476309835910797,
+      "eval_runtime": 90.525,
+      "eval_samples_per_second": 4.275,
+      "eval_steps_per_second": 0.541,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.17941106855869293,
+      "learning_rate": 0.00027,
+      "loss": 0.2216,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.20095375180244446,
+      "learning_rate": 0.0003,
+      "loss": 0.1832,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": 0.2350914180278778,
+      "eval_runtime": 90.3919,
+      "eval_samples_per_second": 4.281,
+      "eval_steps_per_second": 0.542,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": 0.2600422501564026,
+      "learning_rate": 0.00029,
+      "loss": 0.1441,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": 0.20544037222862244,
+      "learning_rate": 0.00028,
+      "loss": 0.1186,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": 0.23090216517448425,
+      "eval_runtime": 90.3144,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 120
+    },
+    {
+      "epoch": 10.61,
+      "grad_norm": 0.2158157229423523,
+      "learning_rate": 0.00027,
+      "loss": 0.0947,
+      "step": 130
+    },
+    {
+      "epoch": 11.43,
+      "grad_norm": 0.18916285037994385,
+      "learning_rate": 0.00026,
+      "loss": 0.0768,
+      "step": 140
+    },
+    {
+      "epoch": 11.43,
+      "eval_loss": 0.24214179813861847,
+      "eval_runtime": 90.2597,
+      "eval_samples_per_second": 4.288,
+      "eval_steps_per_second": 0.543,
+      "step": 140
+    },
+    {
+      "epoch": 12.24,
+      "grad_norm": 0.22263498604297638,
+      "learning_rate": 0.00025,
+      "loss": 0.0615,
+      "step": 150
+    },
+    {
+      "epoch": 13.06,
+      "grad_norm": 0.21315976977348328,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.054,
+      "step": 160
+    },
+    {
+      "epoch": 13.06,
+      "eval_loss": 0.25932466983795166,
+      "eval_runtime": 89.8439,
+      "eval_samples_per_second": 4.307,
+      "eval_steps_per_second": 0.545,
+      "step": 160
+    },
+    {
+      "epoch": 13.88,
+      "grad_norm": 0.18338361382484436,
+      "learning_rate": 0.00023,
+      "loss": 0.0455,
+      "step": 170
+    },
+    {
+      "epoch": 14.69,
+      "grad_norm": 0.17157459259033203,
+      "learning_rate": 0.00021999999999999995,
+      "loss": 0.0393,
+      "step": 180
+    },
+    {
+      "epoch": 14.69,
+      "eval_loss": 0.27233538031578064,
+      "eval_runtime": 90.1364,
+      "eval_samples_per_second": 4.293,
+      "eval_steps_per_second": 0.544,
+      "step": 180
+    },
+    {
+      "epoch": 15.51,
+      "grad_norm": 0.1541435867547989,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.0352,
+      "step": 190
+    },
+    {
+      "epoch": 16.33,
+      "grad_norm": 0.1553652435541153,
+      "learning_rate": 0.00019999999999999998,
+      "loss": 0.0325,
+      "step": 200
+    },
+    {
+      "epoch": 16.33,
+      "eval_loss": 0.28704825043678284,
+      "eval_runtime": 89.7951,
+      "eval_samples_per_second": 4.31,
+      "eval_steps_per_second": 0.546,
+      "step": 200
+    },
+    {
+      "epoch": 17.14,
+      "grad_norm": 0.13403691351413727,
+      "learning_rate": 0.00018999999999999998,
+      "loss": 0.0297,
+      "step": 210
+    },
+    {
+      "epoch": 17.96,
+      "grad_norm": 0.14512716233730316,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.0279,
+      "step": 220
+    },
+    {
+      "epoch": 17.96,
+      "eval_loss": 0.2964874505996704,
+      "eval_runtime": 89.7009,
+      "eval_samples_per_second": 4.314,
+      "eval_steps_per_second": 0.546,
+      "step": 220
+    },
+    {
+      "epoch": 18.78,
+      "grad_norm": 0.12400835007429123,
+      "learning_rate": 0.00016999999999999999,
+      "loss": 0.0263,
+      "step": 230
+    },
+    {
+      "epoch": 19.59,
+      "grad_norm": 0.1139909029006958,
+      "learning_rate": 0.00015999999999999999,
+      "loss": 0.0246,
+      "step": 240
+    },
+    {
+      "epoch": 19.59,
+      "eval_loss": 0.30519917607307434,
+      "eval_runtime": 89.8387,
+      "eval_samples_per_second": 4.308,
+      "eval_steps_per_second": 0.545,
+      "step": 240
+    },
+    {
+      "epoch": 20.41,
+      "grad_norm": 0.12317101657390594,
+      "learning_rate": 0.00015,
+      "loss": 0.0235,
+      "step": 250
+    },
+    {
+      "epoch": 21.22,
+      "grad_norm": 0.12494686245918274,
+      "learning_rate": 0.00014,
+      "loss": 0.0224,
+      "step": 260
+    },
+    {
+      "epoch": 21.22,
+      "eval_loss": 0.314134418964386,
+      "eval_runtime": 89.7974,
+      "eval_samples_per_second": 4.31,
+      "eval_steps_per_second": 0.546,
+      "step": 260
+    },
+    {
+      "epoch": 22.04,
+      "grad_norm": 0.1180659756064415,
+      "learning_rate": 0.00013,
+      "loss": 0.022,
+      "step": 270
+    },
+    {
+      "epoch": 22.86,
+      "grad_norm": 0.09653373062610626,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.0212,
+      "step": 280
+    },
+    {
+      "epoch": 22.86,
+      "eval_loss": 0.3175604045391083,
+      "eval_runtime": 89.9764,
+      "eval_samples_per_second": 4.301,
+      "eval_steps_per_second": 0.545,
+      "step": 280
+    },
+    {
+      "epoch": 23.67,
+      "grad_norm": 0.10445748269557953,
+      "learning_rate": 0.00010999999999999998,
+      "loss": 0.0208,
+      "step": 290
+    },
+    {
+      "epoch": 24.49,
+      "grad_norm": 0.09245337545871735,
+      "learning_rate": 9.999999999999999e-05,
+      "loss": 0.0199,
+      "step": 300
+    },
+    {
+      "epoch": 24.49,
+      "eval_loss": 0.32360976934432983,
+      "eval_runtime": 89.8613,
+      "eval_samples_per_second": 4.307,
+      "eval_steps_per_second": 0.545,
+      "step": 300
+    },
+    {
+      "epoch": 25.31,
+      "grad_norm": 0.09468758851289749,
+      "learning_rate": 8.999999999999999e-05,
+      "loss": 0.0197,
+      "step": 310
+    },
+    {
+      "epoch": 26.12,
+      "grad_norm": 0.0891977846622467,
+      "learning_rate": 7.999999999999999e-05,
+      "loss": 0.0192,
+      "step": 320
+    },
+    {
+      "epoch": 26.12,
+      "eval_loss": 0.3267403841018677,
+      "eval_runtime": 90.3063,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 320
+    },
+    {
+      "epoch": 26.94,
+      "grad_norm": 0.08574336767196655,
+      "learning_rate": 7e-05,
+      "loss": 0.0188,
+      "step": 330
+    },
+    {
+      "epoch": 27.76,
+      "grad_norm": 0.08517367392778397,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 0.0184,
+      "step": 340
+    },
+    {
+      "epoch": 27.76,
+      "eval_loss": 0.33063870668411255,
+      "eval_runtime": 89.8041,
+      "eval_samples_per_second": 4.309,
+      "eval_steps_per_second": 0.546,
+      "step": 340
+    },
+    {
+      "epoch": 28.57,
+      "grad_norm": 0.08357132971286774,
+      "learning_rate": 4.9999999999999996e-05,
+      "loss": 0.0181,
+      "step": 350
+    },
+    {
+      "epoch": 29.39,
+      "grad_norm": 0.08679915964603424,
+      "learning_rate": 3.9999999999999996e-05,
+      "loss": 0.0181,
+      "step": 360
+    },
+    {
+      "epoch": 29.39,
+      "eval_loss": 0.3352932929992676,
+      "eval_runtime": 90.2438,
+      "eval_samples_per_second": 4.288,
+      "eval_steps_per_second": 0.543,
+      "step": 360
+    },
+    {
+      "epoch": 30.2,
+      "grad_norm": 0.07208231836557388,
+      "learning_rate": 2.9999999999999997e-05,
+      "loss": 0.0178,
+      "step": 370
+    },
+    {
+      "epoch": 31.02,
+      "grad_norm": 0.08611435443162918,
+      "learning_rate": 1.9999999999999998e-05,
+      "loss": 0.0175,
+      "step": 380
+    },
+    {
+      "epoch": 31.02,
+      "eval_loss": 0.338119238615036,
+      "eval_runtime": 90.2767,
+      "eval_samples_per_second": 4.287,
+      "eval_steps_per_second": 0.543,
+      "step": 380
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 2.1789060364511478e+18,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-380/training_args.bin b/checkpoint-380/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3f0d01d8ba12c6a725736cdf727e72ec03ea9a4f
--- /dev/null
+++ b/checkpoint-380/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01be3c1366faeea704d7b18d02c117abdc170d0c96565a08a0f3ad9c5e7a123a
+size 4856
diff --git a/checkpoint-40/README.md b/checkpoint-40/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4b6f787248182d62a16f6423f948c336352c3674
--- /dev/null
+++ b/checkpoint-40/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: bigcode/starcoder
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-40/adapter_config.json b/checkpoint-40/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ab6c617ff1322585052700834abbc593bae7c619
--- /dev/null
+++ b/checkpoint-40/adapter_config.json
@@ -0,0 +1,28 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bigcode/starcoder",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_proj",
+    "c_attn",
+    "q_attn"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-40/adapter_model.safetensors b/checkpoint-40/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-40/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-40/optimizer.pt b/checkpoint-40/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e164c05d60c992a05ac41272e225d4fc0f5b3138
--- /dev/null
+++ b/checkpoint-40/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8725ae58cb6274766242de451fee2b4e9735b5babfc1f0a0978d1e4d1e0d13b7
+size 284628602
diff --git a/checkpoint-40/rng_state.pth b/checkpoint-40/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9d68344df651a0ac3b45d1a20a36acb7f27f114b
--- /dev/null
+++ b/checkpoint-40/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9b8cf7272d40bde7127d6862dd27f666bac39b576335a5119bee0adfd33cad2
+size 14244
diff --git a/checkpoint-40/scheduler.pt b/checkpoint-40/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3d1c414264d7a61f0eab2a577c1b2d29d2015e3c
--- /dev/null
+++ b/checkpoint-40/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c2c8a6a21ca956c9d84f17aca6da97a5c2b2ba1ba5d1e4f431e18bc47f94ca32
+size 1064
diff --git a/checkpoint-40/trainer_state.json b/checkpoint-40/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..ad06a8838adbdd063f90438741c1c88b7a11b6f0
--- /dev/null
+++ b/checkpoint-40/trainer_state.json
@@ -0,0 +1,65 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.2653061224489797,
+  "eval_steps": 20,
+  "global_step": 40,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.18377472460269928,
+      "learning_rate": 2.9999999999999997e-05,
+      "loss": 1.861,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.35202744603157043,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 1.7263,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": 1.4457755088806152,
+      "eval_runtime": 89.8938,
+      "eval_samples_per_second": 4.305,
+      "eval_steps_per_second": 0.545,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": 0.928983747959137,
+      "learning_rate": 8.999999999999999e-05,
+      "loss": 1.1718,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": 0.253262996673584,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.4789,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": 0.3332095146179199,
+      "eval_runtime": 89.9804,
+      "eval_samples_per_second": 4.301,
+      "eval_steps_per_second": 0.545,
+      "step": 40
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 2.2956006247602586e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-40/training_args.bin b/checkpoint-40/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3f0d01d8ba12c6a725736cdf727e72ec03ea9a4f
--- /dev/null
+++ b/checkpoint-40/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01be3c1366faeea704d7b18d02c117abdc170d0c96565a08a0f3ad9c5e7a123a
+size 4856
diff --git a/checkpoint-400/README.md b/checkpoint-400/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4b6f787248182d62a16f6423f948c336352c3674
--- /dev/null
+++ b/checkpoint-400/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: bigcode/starcoder
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-400/adapter_config.json b/checkpoint-400/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ab6c617ff1322585052700834abbc593bae7c619
--- /dev/null
+++ b/checkpoint-400/adapter_config.json
@@ -0,0 +1,28 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bigcode/starcoder",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_proj",
+    "c_attn",
+    "q_attn"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-400/adapter_model.safetensors b/checkpoint-400/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-400/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-400/optimizer.pt b/checkpoint-400/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f063a64b8788731f471ef26ea29c4fb588d6a6a1
--- /dev/null
+++ b/checkpoint-400/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6135358af0dd648b51a5faf17d8f31c2e64885a4ac2282a2142e25937bbfc3b8
+size 284628602
diff --git a/checkpoint-400/rng_state.pth b/checkpoint-400/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..abe706932ca6879f53044735e0ae8d2a3e2ad561
--- /dev/null
+++ b/checkpoint-400/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d4fa90b42117e78621355d0b2d6ec35c92d5ce1d8a8e3fd4c6a78791b096ffd2
+size 14244
diff --git a/checkpoint-400/scheduler.pt b/checkpoint-400/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..62c7d6ea1655a58e9a5460c3034b46e354a34af4
--- /dev/null
+++ b/checkpoint-400/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee665d99b8d4ac37b6829a57abd01a01763b04846f27bc645d525d70173d6821
+size 1064
diff --git a/checkpoint-400/trainer_state.json b/checkpoint-400/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..68537d37a7e8de364310b2b0e76f5f4e4d569a26
--- /dev/null
+++ b/checkpoint-400/trainer_state.json
@@ -0,0 +1,461 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 32.6530612244898,
+  "eval_steps": 20,
+  "global_step": 400,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.18377472460269928,
+      "learning_rate": 2.9999999999999997e-05,
+      "loss": 1.861,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.35202744603157043,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 1.7263,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": 1.4457755088806152,
+      "eval_runtime": 89.8938,
+      "eval_samples_per_second": 4.305,
+      "eval_steps_per_second": 0.545,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": 0.928983747959137,
+      "learning_rate": 8.999999999999999e-05,
+      "loss": 1.1718,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": 0.253262996673584,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.4789,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": 0.3332095146179199,
+      "eval_runtime": 89.9804,
+      "eval_samples_per_second": 4.301,
+      "eval_steps_per_second": 0.545,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.12236642092466354,
+      "learning_rate": 0.00015,
+      "loss": 0.3568,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.09160923212766647,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.3256,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": 0.2753114104270935,
+      "eval_runtime": 90.3206,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": 0.10242326557636261,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.2841,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": 0.1305350810289383,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.2615,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": 0.2476309835910797,
+      "eval_runtime": 90.525,
+      "eval_samples_per_second": 4.275,
+      "eval_steps_per_second": 0.541,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.17941106855869293,
+      "learning_rate": 0.00027,
+      "loss": 0.2216,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.20095375180244446,
+      "learning_rate": 0.0003,
+      "loss": 0.1832,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": 0.2350914180278778,
+      "eval_runtime": 90.3919,
+      "eval_samples_per_second": 4.281,
+      "eval_steps_per_second": 0.542,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": 0.2600422501564026,
+      "learning_rate": 0.00029,
+      "loss": 0.1441,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": 0.20544037222862244,
+      "learning_rate": 0.00028,
+      "loss": 0.1186,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": 0.23090216517448425,
+      "eval_runtime": 90.3144,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 120
+    },
+    {
+      "epoch": 10.61,
+      "grad_norm": 0.2158157229423523,
+      "learning_rate": 0.00027,
+      "loss": 0.0947,
+      "step": 130
+    },
+    {
+      "epoch": 11.43,
+      "grad_norm": 0.18916285037994385,
+      "learning_rate": 0.00026,
+      "loss": 0.0768,
+      "step": 140
+    },
+    {
+      "epoch": 11.43,
+      "eval_loss": 0.24214179813861847,
+      "eval_runtime": 90.2597,
+      "eval_samples_per_second": 4.288,
+      "eval_steps_per_second": 0.543,
+      "step": 140
+    },
+    {
+      "epoch": 12.24,
+      "grad_norm": 0.22263498604297638,
+      "learning_rate": 0.00025,
+      "loss": 0.0615,
+      "step": 150
+    },
+    {
+      "epoch": 13.06,
+      "grad_norm": 0.21315976977348328,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.054,
+      "step": 160
+    },
+    {
+      "epoch": 13.06,
+      "eval_loss": 0.25932466983795166,
+      "eval_runtime": 89.8439,
+      "eval_samples_per_second": 4.307,
+      "eval_steps_per_second": 0.545,
+      "step": 160
+    },
+    {
+      "epoch": 13.88,
+      "grad_norm": 0.18338361382484436,
+      "learning_rate": 0.00023,
+      "loss": 0.0455,
+      "step": 170
+    },
+    {
+      "epoch": 14.69,
+      "grad_norm": 0.17157459259033203,
+      "learning_rate": 0.00021999999999999995,
+      "loss": 0.0393,
+      "step": 180
+    },
+    {
+      "epoch": 14.69,
+      "eval_loss": 0.27233538031578064,
+      "eval_runtime": 90.1364,
+      "eval_samples_per_second": 4.293,
+      "eval_steps_per_second": 0.544,
+      "step": 180
+    },
+    {
+      "epoch": 15.51,
+      "grad_norm": 0.1541435867547989,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.0352,
+      "step": 190
+    },
+    {
+      "epoch": 16.33,
+      "grad_norm": 0.1553652435541153,
+      "learning_rate": 0.00019999999999999998,
+      "loss": 0.0325,
+      "step": 200
+    },
+    {
+      "epoch": 16.33,
+      "eval_loss": 0.28704825043678284,
+      "eval_runtime": 89.7951,
+      "eval_samples_per_second": 4.31,
+      "eval_steps_per_second": 0.546,
+      "step": 200
+    },
+    {
+      "epoch": 17.14,
+      "grad_norm": 0.13403691351413727,
+      "learning_rate": 0.00018999999999999998,
+      "loss": 0.0297,
+      "step": 210
+    },
+    {
+      "epoch": 17.96,
+      "grad_norm": 0.14512716233730316,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.0279,
+      "step": 220
+    },
+    {
+      "epoch": 17.96,
+      "eval_loss": 0.2964874505996704,
+      "eval_runtime": 89.7009,
+      "eval_samples_per_second": 4.314,
+      "eval_steps_per_second": 0.546,
+      "step": 220
+    },
+    {
+      "epoch": 18.78,
+      "grad_norm": 0.12400835007429123,
+      "learning_rate": 0.00016999999999999999,
+      "loss": 0.0263,
+      "step": 230
+    },
+    {
+      "epoch": 19.59,
+      "grad_norm": 0.1139909029006958,
+      "learning_rate": 0.00015999999999999999,
+      "loss": 0.0246,
+      "step": 240
+    },
+    {
+      "epoch": 19.59,
+      "eval_loss": 0.30519917607307434,
+      "eval_runtime": 89.8387,
+      "eval_samples_per_second": 4.308,
+      "eval_steps_per_second": 0.545,
+      "step": 240
+    },
+    {
+      "epoch": 20.41,
+      "grad_norm": 0.12317101657390594,
+      "learning_rate": 0.00015,
+      "loss": 0.0235,
+      "step": 250
+    },
+    {
+      "epoch": 21.22,
+      "grad_norm": 0.12494686245918274,
+      "learning_rate": 0.00014,
+      "loss": 0.0224,
+      "step": 260
+    },
+    {
+      "epoch": 21.22,
+      "eval_loss": 0.314134418964386,
+      "eval_runtime": 89.7974,
+      "eval_samples_per_second": 4.31,
+      "eval_steps_per_second": 0.546,
+      "step": 260
+    },
+    {
+      "epoch": 22.04,
+      "grad_norm": 0.1180659756064415,
+      "learning_rate": 0.00013,
+      "loss": 0.022,
+      "step": 270
+    },
+    {
+      "epoch": 22.86,
+      "grad_norm": 0.09653373062610626,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.0212,
+      "step": 280
+    },
+    {
+      "epoch": 22.86,
+      "eval_loss": 0.3175604045391083,
+      "eval_runtime": 89.9764,
+      "eval_samples_per_second": 4.301,
+      "eval_steps_per_second": 0.545,
+      "step": 280
+    },
+    {
+      "epoch": 23.67,
+      "grad_norm": 0.10445748269557953,
+      "learning_rate": 0.00010999999999999998,
+      "loss": 0.0208,
+      "step": 290
+    },
+    {
+      "epoch": 24.49,
+      "grad_norm": 0.09245337545871735,
+      "learning_rate": 9.999999999999999e-05,
+      "loss": 0.0199,
+      "step": 300
+    },
+    {
+      "epoch": 24.49,
+      "eval_loss": 0.32360976934432983,
+      "eval_runtime": 89.8613,
+      "eval_samples_per_second": 4.307,
+      "eval_steps_per_second": 0.545,
+      "step": 300
+    },
+    {
+      "epoch": 25.31,
+      "grad_norm": 0.09468758851289749,
+      "learning_rate": 8.999999999999999e-05,
+      "loss": 0.0197,
+      "step": 310
+    },
+    {
+      "epoch": 26.12,
+      "grad_norm": 0.0891977846622467,
+      "learning_rate": 7.999999999999999e-05,
+      "loss": 0.0192,
+      "step": 320
+    },
+    {
+      "epoch": 26.12,
+      "eval_loss": 0.3267403841018677,
+      "eval_runtime": 90.3063,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 320
+    },
+    {
+      "epoch": 26.94,
+      "grad_norm": 0.08574336767196655,
+      "learning_rate": 7e-05,
+      "loss": 0.0188,
+      "step": 330
+    },
+    {
+      "epoch": 27.76,
+      "grad_norm": 0.08517367392778397,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 0.0184,
+      "step": 340
+    },
+    {
+      "epoch": 27.76,
+      "eval_loss": 0.33063870668411255,
+      "eval_runtime": 89.8041,
+      "eval_samples_per_second": 4.309,
+      "eval_steps_per_second": 0.546,
+      "step": 340
+    },
+    {
+      "epoch": 28.57,
+      "grad_norm": 0.08357132971286774,
+      "learning_rate": 4.9999999999999996e-05,
+      "loss": 0.0181,
+      "step": 350
+    },
+    {
+      "epoch": 29.39,
+      "grad_norm": 0.08679915964603424,
+      "learning_rate": 3.9999999999999996e-05,
+      "loss": 0.0181,
+      "step": 360
+    },
+    {
+      "epoch": 29.39,
+      "eval_loss": 0.3352932929992676,
+      "eval_runtime": 90.2438,
+      "eval_samples_per_second": 4.288,
+      "eval_steps_per_second": 0.543,
+      "step": 360
+    },
+    {
+      "epoch": 30.2,
+      "grad_norm": 0.07208231836557388,
+      "learning_rate": 2.9999999999999997e-05,
+      "loss": 0.0178,
+      "step": 370
+    },
+    {
+      "epoch": 31.02,
+      "grad_norm": 0.08611435443162918,
+      "learning_rate": 1.9999999999999998e-05,
+      "loss": 0.0175,
+      "step": 380
+    },
+    {
+      "epoch": 31.02,
+      "eval_loss": 0.338119238615036,
+      "eval_runtime": 90.2767,
+      "eval_samples_per_second": 4.287,
+      "eval_steps_per_second": 0.543,
+      "step": 380
+    },
+    {
+      "epoch": 31.84,
+      "grad_norm": 0.07755295187234879,
+      "learning_rate": 9.999999999999999e-06,
+      "loss": 0.0176,
+      "step": 390
+    },
+    {
+      "epoch": 32.65,
+      "grad_norm": 0.07367673516273499,
+      "learning_rate": 0.0,
+      "loss": 0.0173,
+      "step": 400
+    },
+    {
+      "epoch": 32.65,
+      "eval_loss": 0.3382853865623474,
+      "eval_runtime": 90.0437,
+      "eval_samples_per_second": 4.298,
+      "eval_steps_per_second": 0.544,
+      "step": 400
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 2.2944565601689928e+18,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-400/training_args.bin b/checkpoint-400/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3f0d01d8ba12c6a725736cdf727e72ec03ea9a4f
--- /dev/null
+++ b/checkpoint-400/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01be3c1366faeea704d7b18d02c117abdc170d0c96565a08a0f3ad9c5e7a123a
+size 4856
diff --git a/checkpoint-60/README.md b/checkpoint-60/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4b6f787248182d62a16f6423f948c336352c3674
--- /dev/null
+++ b/checkpoint-60/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: bigcode/starcoder
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-60/adapter_config.json b/checkpoint-60/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ab6c617ff1322585052700834abbc593bae7c619
--- /dev/null
+++ b/checkpoint-60/adapter_config.json
@@ -0,0 +1,28 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bigcode/starcoder",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_proj",
+    "c_attn",
+    "q_attn"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-60/adapter_model.safetensors b/checkpoint-60/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-60/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-60/optimizer.pt b/checkpoint-60/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9737729f3fd08f9033816836e4170e40afdf350b
--- /dev/null
+++ b/checkpoint-60/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b95f1af5b76ccbe64193f2025017a1bcabe6da07c7696008de971d17369c5c0
+size 284628602
diff --git a/checkpoint-60/rng_state.pth b/checkpoint-60/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ea19684ee32c082089d6be5faa1c380f5ab5e08b
--- /dev/null
+++ b/checkpoint-60/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ae8652c2879fe670bb11f4dc2977badc0e05f30468a3d779c66a37c466dda75
+size 14244
diff --git a/checkpoint-60/scheduler.pt b/checkpoint-60/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ccc7f4b428abbeedfc6798ac0a82ddf117ba2dfc
--- /dev/null
+++ b/checkpoint-60/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26d0406405dc2799f6a205a30a40ceac73b9e2fdb57b3e7109b27235b06006ef
+size 1064
diff --git a/checkpoint-60/trainer_state.json b/checkpoint-60/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..7590283f71046258b0022bb96dc9c4b1f3518b57
--- /dev/null
+++ b/checkpoint-60/trainer_state.json
@@ -0,0 +1,87 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 4.8979591836734695,
+  "eval_steps": 20,
+  "global_step": 60,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.18377472460269928,
+      "learning_rate": 2.9999999999999997e-05,
+      "loss": 1.861,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.35202744603157043,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 1.7263,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": 1.4457755088806152,
+      "eval_runtime": 89.8938,
+      "eval_samples_per_second": 4.305,
+      "eval_steps_per_second": 0.545,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": 0.928983747959137,
+      "learning_rate": 8.999999999999999e-05,
+      "loss": 1.1718,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": 0.253262996673584,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.4789,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": 0.3332095146179199,
+      "eval_runtime": 89.9804,
+      "eval_samples_per_second": 4.301,
+      "eval_steps_per_second": 0.545,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.12236642092466354,
+      "learning_rate": 0.00015,
+      "loss": 0.3568,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.09160923212766647,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.3256,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": 0.2753114104270935,
+      "eval_runtime": 90.3206,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 60
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 3.449704966520832e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-60/training_args.bin b/checkpoint-60/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3f0d01d8ba12c6a725736cdf727e72ec03ea9a4f
--- /dev/null
+++ b/checkpoint-60/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01be3c1366faeea704d7b18d02c117abdc170d0c96565a08a0f3ad9c5e7a123a
+size 4856
diff --git a/checkpoint-80/README.md b/checkpoint-80/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4b6f787248182d62a16f6423f948c336352c3674
--- /dev/null
+++ b/checkpoint-80/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: bigcode/starcoder
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-80/adapter_config.json b/checkpoint-80/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ab6c617ff1322585052700834abbc593bae7c619
--- /dev/null
+++ b/checkpoint-80/adapter_config.json
@@ -0,0 +1,28 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bigcode/starcoder",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_proj",
+    "c_attn",
+    "q_attn"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-80/adapter_model.safetensors b/checkpoint-80/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-80/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-80/optimizer.pt b/checkpoint-80/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..22b62039fd85cbe0ec0054d4e9988ceef8c28cbd
--- /dev/null
+++ b/checkpoint-80/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c298e33138163e2a15ee5a6a6cc8f3e68c596077e4d00579ac36d42a3a18228
+size 284628602
diff --git a/checkpoint-80/rng_state.pth b/checkpoint-80/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b71d91cd22e12bb910fa89a1496b2638c19590a4
--- /dev/null
+++ b/checkpoint-80/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d18a6bfea687c87d90c470af6ca33dff658dae861a2666cb386ea47c46f0bb3
+size 14244
diff --git a/checkpoint-80/scheduler.pt b/checkpoint-80/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ee345e8a659f9d4e86e79b79c6e415ea95b6fa42
--- /dev/null
+++ b/checkpoint-80/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:21d82a70de51a4166c824e4076d761aaf8a8967df5c1cd7fdce99da5c3b5bc50
+size 1064
diff --git a/checkpoint-80/trainer_state.json b/checkpoint-80/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..7365bbb261e1c2e97b1a4d9e283d6b8471e0d2d4
--- /dev/null
+++ b/checkpoint-80/trainer_state.json
@@ -0,0 +1,109 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 6.530612244897959,
+  "eval_steps": 20,
+  "global_step": 80,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.18377472460269928,
+      "learning_rate": 2.9999999999999997e-05,
+      "loss": 1.861,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.35202744603157043,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 1.7263,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": 1.4457755088806152,
+      "eval_runtime": 89.8938,
+      "eval_samples_per_second": 4.305,
+      "eval_steps_per_second": 0.545,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": 0.928983747959137,
+      "learning_rate": 8.999999999999999e-05,
+      "loss": 1.1718,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": 0.253262996673584,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.4789,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": 0.3332095146179199,
+      "eval_runtime": 89.9804,
+      "eval_samples_per_second": 4.301,
+      "eval_steps_per_second": 0.545,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.12236642092466354,
+      "learning_rate": 0.00015,
+      "loss": 0.3568,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.09160923212766647,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.3256,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": 0.2753114104270935,
+      "eval_runtime": 90.3206,
+      "eval_samples_per_second": 4.285,
+      "eval_steps_per_second": 0.543,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": 0.10242326557636261,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.2841,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": 0.1305350810289383,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.2615,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": 0.2476309835910797,
+      "eval_runtime": 90.525,
+      "eval_samples_per_second": 4.275,
+      "eval_steps_per_second": 0.541,
+      "step": 80
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 4.593302592647332e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-80/training_args.bin b/checkpoint-80/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3f0d01d8ba12c6a725736cdf727e72ec03ea9a4f
--- /dev/null
+++ b/checkpoint-80/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01be3c1366faeea704d7b18d02c117abdc170d0c96565a08a0f3ad9c5e7a123a
+size 4856
diff --git a/runs/Feb21_13-08-44_nq0jxhxas9/events.out.tfevents.1708520927.nq0jxhxas9.2015.0 b/runs/Feb21_13-08-44_nq0jxhxas9/events.out.tfevents.1708520927.nq0jxhxas9.2015.0
new file mode 100644
index 0000000000000000000000000000000000000000..b5c22135d4f498ee366043a888fe2238605434bb
--- /dev/null
+++ b/runs/Feb21_13-08-44_nq0jxhxas9/events.out.tfevents.1708520927.nq0jxhxas9.2015.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69773f591aabaa18531f775feb5941bdb18c98b05137a37f4a45da2770ec1850
+size 19459