ihughes15234/adapter-phi-3-mini-500_combined_3

Browse files

Files changed (5) hide show

README.md +21 -33
adapter_config.json +5 -5
adapter_model.safetensors +1 -1
tokenizer.json +1 -1
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -18,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [microsoft/Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.0407
 ## Model description
@@ -37,7 +37,7 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 0.0001
 - train_batch_size: 4
 - eval_batch_size: 4
 - seed: 42
@@ -45,42 +45,30 @@ The following hyperparameters were used during training:
 - total_train_batch_size: 16
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- lr_scheduler_warmup_ratio: 0.1
-- num_epochs: 5
 ### Training results
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
-| 1.7043        | 0.1684 | 50   | 1.6293          |
-| 1.2795        | 0.3367 | 100  | 0.8934          |
-| 0.5263        | 0.5051 | 150  | 0.1646          |
-| 0.0889        | 0.6734 | 200  | 0.0631          |
-| 0.0561        | 0.8418 | 250  | 0.0550          |
-| 0.0527        | 1.0101 | 300  | 0.0523          |
-| 0.0493        | 1.1785 | 350  | 0.0505          |
-| 0.048         | 1.3468 | 400  | 0.0487          |
-| 0.0462        | 1.5152 | 450  | 0.0478          |
-| 0.0482        | 1.6835 | 500  | 0.0465          |
-| 0.0454        | 1.8519 | 550  | 0.0455          |
-| 0.0443        | 2.0202 | 600  | 0.0459          |
-| 0.0451        | 2.1886 | 650  | 0.0455          |
-| 0.0447        | 2.3569 | 700  | 0.0446          |
-| 0.0413        | 2.5253 | 750  | 0.0441          |
-| 0.0423        | 2.6936 | 800  | 0.0442          |
-| 0.0435        | 2.8620 | 850  | 0.0437          |
-| 0.0411        | 3.0303 | 900  | 0.0432          |
-| 0.0382        | 3.1987 | 950  | 0.0431          |
-| 0.0413        | 3.3670 | 1000 | 0.0427          |
-| 0.0426        | 3.5354 | 1050 | 0.0429          |
-| 0.0408        | 3.7037 | 1100 | 0.0420          |
-| 0.0412        | 3.8721 | 1150 | 0.0418          |
-| 0.0377        | 4.0404 | 1200 | 0.0413          |
-| 0.0399        | 4.2088 | 1250 | 0.0415          |
-| 0.0378        | 4.3771 | 1300 | 0.0413          |
-| 0.037         | 4.5455 | 1350 | 0.0410          |
-| 0.0418        | 4.7138 | 1400 | 0.0407          |
-| 0.0391        | 4.8822 | 1450 | 0.0407          |
 ### Framework versions

 This model is a fine-tuned version of [microsoft/Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.3597
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 1e-05
 - train_batch_size: 4
 - eval_batch_size: 4
 - seed: 42
 - total_train_batch_size: 16
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- lr_scheduler_warmup_ratio: 0.2
+- num_epochs: 3
 ### Training results
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
+| 1.761         | 0.1684 | 50   | 1.8291          |
+| 1.751         | 0.3367 | 100  | 1.7497          |
+| 1.6426        | 0.5051 | 150  | 1.6412          |
+| 1.5228        | 0.6734 | 200  | 1.4889          |
+| 1.3624        | 0.8418 | 250  | 1.2639          |
+| 1.126         | 1.0101 | 300  | 1.0570          |
+| 0.9809        | 1.1785 | 350  | 0.9361          |
+| 0.8824        | 1.3468 | 400  | 0.8499          |
+| 0.7793        | 1.5152 | 450  | 0.7608          |
+| 0.7179        | 1.6835 | 500  | 0.6796          |
+| 0.6469        | 1.8519 | 550  | 0.6057          |
+| 0.5654        | 2.0202 | 600  | 0.5418          |
+| 0.5096        | 2.1886 | 650  | 0.4859          |
+| 0.4625        | 2.3569 | 700  | 0.4365          |
+| 0.432         | 2.5253 | 750  | 0.3976          |
+| 0.3866        | 2.6936 | 800  | 0.3732          |
+| 0.3725        | 2.8620 | 850  | 0.3597          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "o_proj",
-    "gate_proj",
-    "down_proj",
     "v_proj",
-    "up_proj",
     "k_proj",
-    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "v_proj",
     "k_proj",
+    "up_proj",
+    "q_proj",
+    "gate_proj",
+    "o_proj",
+    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aec11a4abbe157d1c78c298d4bf286ea5293bbb68381a8a4a815dd2c1bed1f7a
 size 35668592

 version https://git-lfs.github.com/spec/v1
+oid sha256:25b5ff67de71aad7b8738b02f01f29da452e70be0b65799e51b1f86b55789033
 size 35668592

tokenizer.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "version": "1.0",
   "truncation": {
     "direction": "Right",
-    "max_length": 1024,
     "strategy": "LongestFirst",
     "stride": 0
   },

   "version": "1.0",
   "truncation": {
     "direction": "Right",
+    "max_length": 2048,
     "strategy": "LongestFirst",
     "stride": 0
   },

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:05379fbffd1bbe6e3d3d99c8f29c3d067a8ceb28a47b952147c7a7356d36db87
 size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:ca4861df2247dd78029c41574a03528f12104d588aabeff4c3b6f35434318fa0
 size 5432