not-lain commited on
Commit
f06d315
1 Parent(s): 3f119f8

not-lain/finetuned_mistral_on_ads

Browse files
README.md ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ library_name: peft
4
+ tags:
5
+ - trl
6
+ - sft
7
+ - generated_from_trainer
8
+ base_model: mistralai/Mistral-7B-Instruct-v0.3
9
+ model-index:
10
+ - name: finetuned_mistral_on_ads
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # finetuned_mistral_on_ads
18
+
19
+ This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.3](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3) on an unknown dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 1.5249
22
+
23
+ ## Model description
24
+
25
+ More information needed
26
+
27
+ ## Intended uses & limitations
28
+
29
+ More information needed
30
+
31
+ ## Training and evaluation data
32
+
33
+ More information needed
34
+
35
+ ## Training procedure
36
+
37
+ ### Training hyperparameters
38
+
39
+ The following hyperparameters were used during training:
40
+ - learning_rate: 5e-05
41
+ - train_batch_size: 2
42
+ - eval_batch_size: 2
43
+ - seed: 42
44
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
+ - lr_scheduler_type: linear
46
+ - num_epochs: 3
47
+
48
+ ### Training results
49
+
50
+ | Training Loss | Epoch | Step | Validation Loss |
51
+ |:-------------:|:------:|:----:|:---------------:|
52
+ | 3.7417 | 0.0444 | 2 | 3.6685 |
53
+ | 3.6314 | 0.0889 | 4 | 3.2304 |
54
+ | 3.0686 | 0.1333 | 6 | 2.8771 |
55
+ | 2.5057 | 0.1778 | 8 | 2.7170 |
56
+ | 2.5453 | 0.2222 | 10 | 2.5886 |
57
+ | 2.5759 | 0.2667 | 12 | 2.4625 |
58
+ | 2.4252 | 0.3111 | 14 | 2.3477 |
59
+ | 2.4227 | 0.3556 | 16 | 2.2455 |
60
+ | 1.987 | 0.4 | 18 | 2.1370 |
61
+ | 2.0229 | 0.4444 | 20 | 2.0484 |
62
+ | 2.0755 | 0.4889 | 22 | 1.9746 |
63
+ | 1.9004 | 0.5333 | 24 | 1.9032 |
64
+ | 1.9381 | 0.5778 | 26 | 1.8405 |
65
+ | 1.7879 | 0.6222 | 28 | 1.7911 |
66
+ | 1.7544 | 0.6667 | 30 | 1.7584 |
67
+ | 1.7485 | 0.7111 | 32 | 1.7290 |
68
+ | 1.6927 | 0.7556 | 34 | 1.7030 |
69
+ | 1.8931 | 0.8 | 36 | 1.6825 |
70
+ | 1.5624 | 0.8444 | 38 | 1.6656 |
71
+ | 1.7061 | 0.8889 | 40 | 1.6528 |
72
+ | 1.7288 | 0.9333 | 42 | 1.6426 |
73
+ | 1.7839 | 0.9778 | 44 | 1.6347 |
74
+ | 1.5954 | 1.0222 | 46 | 1.6270 |
75
+ | 1.4288 | 1.0667 | 48 | 1.6177 |
76
+ | 1.5201 | 1.1111 | 50 | 1.6094 |
77
+ | 1.5281 | 1.1556 | 52 | 1.6037 |
78
+ | 1.4132 | 1.2 | 54 | 1.5998 |
79
+ | 1.4271 | 1.2444 | 56 | 1.5976 |
80
+ | 1.4778 | 1.2889 | 58 | 1.5952 |
81
+ | 1.5138 | 1.3333 | 60 | 1.5921 |
82
+ | 1.4539 | 1.3778 | 62 | 1.5875 |
83
+ | 1.4293 | 1.4222 | 64 | 1.5823 |
84
+ | 1.3673 | 1.4667 | 66 | 1.5773 |
85
+ | 1.5272 | 1.5111 | 68 | 1.5734 |
86
+ | 1.506 | 1.5556 | 70 | 1.5701 |
87
+ | 1.2929 | 1.6 | 72 | 1.5669 |
88
+ | 1.387 | 1.6444 | 74 | 1.5637 |
89
+ | 1.3375 | 1.6889 | 76 | 1.5609 |
90
+ | 1.4666 | 1.7333 | 78 | 1.5586 |
91
+ | 1.2295 | 1.7778 | 80 | 1.5553 |
92
+ | 1.5195 | 1.8222 | 82 | 1.5521 |
93
+ | 1.5116 | 1.8667 | 84 | 1.5488 |
94
+ | 1.2947 | 1.9111 | 86 | 1.5449 |
95
+ | 1.4651 | 1.9556 | 88 | 1.5399 |
96
+ | 1.5171 | 2.0 | 90 | 1.5351 |
97
+ | 1.1823 | 2.0444 | 92 | 1.5312 |
98
+ | 1.3729 | 2.0889 | 94 | 1.5286 |
99
+ | 1.2607 | 2.1333 | 96 | 1.5256 |
100
+ | 1.2048 | 2.1778 | 98 | 1.5237 |
101
+ | 1.2862 | 2.2222 | 100 | 1.5229 |
102
+ | 1.2584 | 2.2667 | 102 | 1.5224 |
103
+ | 1.2285 | 2.3111 | 104 | 1.5223 |
104
+ | 1.2794 | 2.3556 | 106 | 1.5222 |
105
+ | 1.2196 | 2.4 | 108 | 1.5227 |
106
+ | 1.2526 | 2.4444 | 110 | 1.5232 |
107
+ | 1.2876 | 2.4889 | 112 | 1.5237 |
108
+ | 1.1812 | 2.5333 | 114 | 1.5247 |
109
+ | 1.3622 | 2.5778 | 116 | 1.5255 |
110
+ | 1.229 | 2.6222 | 118 | 1.5261 |
111
+ | 1.2796 | 2.6667 | 120 | 1.5262 |
112
+ | 1.2059 | 2.7111 | 122 | 1.5258 |
113
+ | 1.3327 | 2.7556 | 124 | 1.5257 |
114
+ | 1.254 | 2.8 | 126 | 1.5257 |
115
+ | 1.2183 | 2.8444 | 128 | 1.5256 |
116
+ | 1.1979 | 2.8889 | 130 | 1.5254 |
117
+ | 1.2558 | 2.9333 | 132 | 1.5251 |
118
+ | 1.1405 | 2.9778 | 134 | 1.5249 |
119
+
120
+
121
+ ### Framework versions
122
+
123
+ - PEFT 0.11.1
124
+ - Transformers 4.41.2
125
+ - Pytorch 2.3.0+cu121
126
+ - Datasets 2.19.2
127
+ - Tokenizers 0.19.1
adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.3",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 8,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "o_proj",
24
+ "gate_proj",
25
+ "v_proj",
26
+ "k_proj",
27
+ "q_proj",
28
+ "down_proj",
29
+ "up_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
3
+ size 48
runs/Jun10_20-13-54_e20cea9fca81/events.out.tfevents.1718050435.e20cea9fca81.27782.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e999713d7cc2f3a9f810a192873924ae5ca07156ec9fe46ef37e7fdc07f9e05e
3
+ size 37683
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37f00374dea48658ee8f5d0f21895b9bc55cb0103939607c8185bfd1c6ca1f89
3
+ size 587404
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5183451f72280934077151f5dc3d57f8ebb0b29b638ec6f1c43241a3ea6f5eb
3
+ size 5368