not-lain/finetuned_mistral_on_ads

Browse files

Files changed (9) hide show

README.md +127 -0
adapter_config.json +34 -0
adapter_model.safetensors +3 -0
runs/Jun10_20-13-54_e20cea9fca81/events.out.tfevents.1718050435.e20cea9fca81.27782.1 +3 -0
special_tokens_map.json +24 -0
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +0 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,127 @@

+---
+license: apache-2.0
+library_name: peft
+tags:
+- trl
+- sft
+- generated_from_trainer
+base_model: mistralai/Mistral-7B-Instruct-v0.3
+model-index:
+- name: finetuned_mistral_on_ads
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# finetuned_mistral_on_ads
+This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.3](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 1.5249
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-05
+- train_batch_size: 2
+- eval_batch_size: 2
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- num_epochs: 3
+### Training results
+| Training Loss | Epoch  | Step | Validation Loss |
+|:-------------:|:------:|:----:|:---------------:|
+| 3.7417        | 0.0444 | 2    | 3.6685          |
+| 3.6314        | 0.0889 | 4    | 3.2304          |
+| 3.0686        | 0.1333 | 6    | 2.8771          |
+| 2.5057        | 0.1778 | 8    | 2.7170          |
+| 2.5453        | 0.2222 | 10   | 2.5886          |
+| 2.5759        | 0.2667 | 12   | 2.4625          |
+| 2.4252        | 0.3111 | 14   | 2.3477          |
+| 2.4227        | 0.3556 | 16   | 2.2455          |
+| 1.987         | 0.4    | 18   | 2.1370          |
+| 2.0229        | 0.4444 | 20   | 2.0484          |
+| 2.0755        | 0.4889 | 22   | 1.9746          |
+| 1.9004        | 0.5333 | 24   | 1.9032          |
+| 1.9381        | 0.5778 | 26   | 1.8405          |
+| 1.7879        | 0.6222 | 28   | 1.7911          |
+| 1.7544        | 0.6667 | 30   | 1.7584          |
+| 1.7485        | 0.7111 | 32   | 1.7290          |
+| 1.6927        | 0.7556 | 34   | 1.7030          |
+| 1.8931        | 0.8    | 36   | 1.6825          |
+| 1.5624        | 0.8444 | 38   | 1.6656          |
+| 1.7061        | 0.8889 | 40   | 1.6528          |
+| 1.7288        | 0.9333 | 42   | 1.6426          |
+| 1.7839        | 0.9778 | 44   | 1.6347          |
+| 1.5954        | 1.0222 | 46   | 1.6270          |
+| 1.4288        | 1.0667 | 48   | 1.6177          |
+| 1.5201        | 1.1111 | 50   | 1.6094          |
+| 1.5281        | 1.1556 | 52   | 1.6037          |
+| 1.4132        | 1.2    | 54   | 1.5998          |
+| 1.4271        | 1.2444 | 56   | 1.5976          |
+| 1.4778        | 1.2889 | 58   | 1.5952          |
+| 1.5138        | 1.3333 | 60   | 1.5921          |
+| 1.4539        | 1.3778 | 62   | 1.5875          |
+| 1.4293        | 1.4222 | 64   | 1.5823          |
+| 1.3673        | 1.4667 | 66   | 1.5773          |
+| 1.5272        | 1.5111 | 68   | 1.5734          |
+| 1.506         | 1.5556 | 70   | 1.5701          |
+| 1.2929        | 1.6    | 72   | 1.5669          |
+| 1.387         | 1.6444 | 74   | 1.5637          |
+| 1.3375        | 1.6889 | 76   | 1.5609          |
+| 1.4666        | 1.7333 | 78   | 1.5586          |
+| 1.2295        | 1.7778 | 80   | 1.5553          |
+| 1.5195        | 1.8222 | 82   | 1.5521          |
+| 1.5116        | 1.8667 | 84   | 1.5488          |
+| 1.2947        | 1.9111 | 86   | 1.5449          |
+| 1.4651        | 1.9556 | 88   | 1.5399          |
+| 1.5171        | 2.0    | 90   | 1.5351          |
+| 1.1823        | 2.0444 | 92   | 1.5312          |
+| 1.3729        | 2.0889 | 94   | 1.5286          |
+| 1.2607        | 2.1333 | 96   | 1.5256          |
+| 1.2048        | 2.1778 | 98   | 1.5237          |
+| 1.2862        | 2.2222 | 100  | 1.5229          |
+| 1.2584        | 2.2667 | 102  | 1.5224          |
+| 1.2285        | 2.3111 | 104  | 1.5223          |
+| 1.2794        | 2.3556 | 106  | 1.5222          |
+| 1.2196        | 2.4    | 108  | 1.5227          |
+| 1.2526        | 2.4444 | 110  | 1.5232          |
+| 1.2876        | 2.4889 | 112  | 1.5237          |
+| 1.1812        | 2.5333 | 114  | 1.5247          |
+| 1.3622        | 2.5778 | 116  | 1.5255          |
+| 1.229         | 2.6222 | 118  | 1.5261          |
+| 1.2796        | 2.6667 | 120  | 1.5262          |
+| 1.2059        | 2.7111 | 122  | 1.5258          |
+| 1.3327        | 2.7556 | 124  | 1.5257          |
+| 1.254         | 2.8    | 126  | 1.5257          |
+| 1.2183        | 2.8444 | 128  | 1.5256          |
+| 1.1979        | 2.8889 | 130  | 1.5254          |
+| 1.2558        | 2.9333 | 132  | 1.5251          |
+| 1.1405        | 2.9778 | 134  | 1.5249          |
+### Framework versions
+- PEFT 0.11.1
+- Transformers 4.41.2
+- Pytorch 2.3.0+cu121
+- Datasets 2.19.2
+- Tokenizers 0.19.1

adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.3",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 8,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "gate_proj",
+    "v_proj",
+    "k_proj",
+    "q_proj",
+    "down_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48

runs/Jun10_20-13-54_e20cea9fca81/events.out.tfevents.1718050435.e20cea9fca81.27782.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e999713d7cc2f3a9f810a192873924ae5ca07156ec9fe46ef37e7fdc07f9e05e
+size 37683

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "</s>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:37f00374dea48658ee8f5d0f21895b9bc55cb0103939607c8185bfd1c6ca1f89
+size 587404

tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b5183451f72280934077151f5dc3d57f8ebb0b29b638ec6f1c43241a3ea6f5eb
+size 5368