llama7b-ft-lora-sql-v2adapters

Browse files

Files changed (3) hide show

README.md +25 -15
adapter_config.json +6 -6
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -19,7 +19,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) on the generator dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.7207
 ## Model description
@@ -38,29 +38,39 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 0.0002
-- train_batch_size: 4
 - eval_batch_size: 8
 - seed: 1399
-- gradient_accumulation_steps: 8
 - total_train_batch_size: 32
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
-- lr_scheduler_type: constant
-- lr_scheduler_warmup_steps: 10
-- training_steps: 100
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 1.362         | 0.89  | 5    | 1.1124          |
-| 1.005         | 1.78  | 10   | 0.8794          |
-| 0.8444        | 2.67  | 15   | 0.8025          |
-| 0.7662        | 3.56  | 20   | 0.7610          |
-| 0.7049        | 4.44  | 25   | 0.7361          |
-| 0.6586        | 5.33  | 30   | 0.7199          |
-| 0.599         | 6.22  | 35   | 0.7221          |
-| 0.5506        | 7.11  | 40   | 0.7207          |
 ### Framework versions

 This model is a fine-tuned version of [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) on the generator dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.3700
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 0.0003
+- train_batch_size: 8
 - eval_batch_size: 8
 - seed: 1399
+- gradient_accumulation_steps: 4
 - total_train_batch_size: 32
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 100
+- training_steps: 500
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 1.2068        | 0.06  | 20   | 0.8181          |
+| 0.6757        | 0.12  | 40   | 0.5148          |
+| 0.5104        | 0.17  | 60   | 0.4552          |
+| 0.4633        | 0.23  | 80   | 0.4269          |
+| 0.442         | 0.29  | 100  | 0.4110          |
+| 0.428         | 0.35  | 120  | 0.3993          |
+| 0.4209        | 0.41  | 140  | 0.3983          |
+| 0.4142        | 0.47  | 160  | 0.3932          |
+| 0.4032        | 0.52  | 180  | 0.3888          |
+| 0.3999        | 0.58  | 200  | 0.3841          |
+| 0.3977        | 0.64  | 220  | 0.3827          |
+| 0.397         | 0.7   | 240  | 0.3811          |
+| 0.3927        | 0.76  | 260  | 0.3781          |
+| 0.3873        | 0.82  | 280  | 0.3762          |
+| 0.3871        | 0.87  | 300  | 0.3728          |
+| 0.3861        | 0.93  | 320  | 0.3715          |
+| 0.3809        | 0.99  | 340  | 0.3695          |
+| 0.3664        | 1.05  | 360  | 0.3700          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -10,22 +10,22 @@
   "layers_to_transform": null,
   "loftq_config": {},
   "lora_alpha": 32,
-  "lora_dropout": 0.1,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
-  "r": 8,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "o_proj",
     "gate_proj",
-    "v_proj",
     "k_proj",
-    "down_proj",
     "up_proj",
-    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "layers_to_transform": null,
   "loftq_config": {},
   "lora_alpha": 32,
+  "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "r": 16,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "q_proj",
     "gate_proj",
     "k_proj",
+    "v_proj",
     "up_proj",
+    "down_proj",
+    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5370046d575f40c699c1733a264a23118b545f3776e6e9fc192d66d38f2daa1f
 size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:7b265d2440bdc7920eda82925c8b22f165e59e302b2125d6ccacac9ccb3a3924
 size 4920