CorticalStack
commited on
Commit
•
5ee0a55
1
Parent(s):
7612ffe
Upload folder using huggingface_hub
Browse files- README.md +8 -12
- config.json +2 -2
- mergekit_config.yml +6 -6
- model-00001-of-00008.safetensors +1 -1
- model-00002-of-00008.safetensors +1 -1
- model-00003-of-00008.safetensors +1 -1
- model-00004-of-00008.safetensors +1 -1
- model-00005-of-00008.safetensors +1 -1
- model-00006-of-00008.safetensors +1 -1
- model-00007-of-00008.safetensors +1 -1
- model-00008-of-00008.safetensors +1 -1
- special_tokens_map.json +1 -6
- tokenizer_config.json +5 -11
README.md
CHANGED
@@ -3,20 +3,16 @@ license: apache-2.0
|
|
3 |
tags:
|
4 |
- merge
|
5 |
- mergekit
|
6 |
-
- mlabonne/NeuralMonarch-7B
|
7 |
- mlabonne/AlphaMonarch-7B
|
8 |
-
-
|
9 |
- macadeliccc/MBX-7B-v3-DPO
|
10 |
---
|
11 |
|
12 |
-
<img src="pastiche-crown-clown.png" alt="Pastiche crown clown logo" width="800" style="margin-left:'auto' margin-right:'auto' display:'block'"/>
|
13 |
-
|
14 |
# pastiche-crown-clown-7B-dare
|
15 |
|
16 |
pastiche-crown-clown-7B-dare is a DARE merge of the following models using [mergekit](https://github.com/cg123/mergekit):
|
17 |
-
* [mlabonne/NeuralMonarch-7B](https://huggingface.co/mlabonne/NeuralMonarch-7B)
|
18 |
* [mlabonne/AlphaMonarch-7B](https://huggingface.co/mlabonne/AlphaMonarch-7B)
|
19 |
-
* [
|
20 |
* [macadeliccc/MBX-7B-v3-DPO](https://huggingface.co/macadeliccc/MBX-7B-v3-DPO)
|
21 |
|
22 |
See the paper [Language Models are Super Mario: Absorbing Abilities from Homologous Models as a Free Lunch](https://arxiv.org/abs/2311.03099) for more on the method.
|
@@ -25,22 +21,22 @@ See the paper [Language Models are Super Mario: Absorbing Abilities from Homolog
|
|
25 |
|
26 |
```yaml
|
27 |
models:
|
28 |
-
- model:
|
29 |
# No parameters necessary for base model
|
30 |
- model: mlabonne/AlphaMonarch-7B
|
31 |
parameters:
|
32 |
density: 0.53
|
33 |
-
weight: 0.
|
34 |
-
- model:
|
35 |
parameters:
|
36 |
density: 0.53
|
37 |
-
weight: 0.
|
38 |
- model: macadeliccc/MBX-7B-v3-DPO
|
39 |
parameters:
|
40 |
density: 0.53
|
41 |
-
weight: 0.
|
42 |
merge_method: dare_ties
|
43 |
-
base_model:
|
44 |
parameters:
|
45 |
int8_mask: true
|
46 |
dtype: bfloat16
|
|
|
3 |
tags:
|
4 |
- merge
|
5 |
- mergekit
|
|
|
6 |
- mlabonne/AlphaMonarch-7B
|
7 |
+
- mlabonne/NeuralMonarch-7B
|
8 |
- macadeliccc/MBX-7B-v3-DPO
|
9 |
---
|
10 |
|
|
|
|
|
11 |
# pastiche-crown-clown-7B-dare
|
12 |
|
13 |
pastiche-crown-clown-7B-dare is a DARE merge of the following models using [mergekit](https://github.com/cg123/mergekit):
|
|
|
14 |
* [mlabonne/AlphaMonarch-7B](https://huggingface.co/mlabonne/AlphaMonarch-7B)
|
15 |
+
* [mlabonne/NeuralMonarch-7B](https://huggingface.co/mlabonne/NeuralMonarch-7B)
|
16 |
* [macadeliccc/MBX-7B-v3-DPO](https://huggingface.co/macadeliccc/MBX-7B-v3-DPO)
|
17 |
|
18 |
See the paper [Language Models are Super Mario: Absorbing Abilities from Homologous Models as a Free Lunch](https://arxiv.org/abs/2311.03099) for more on the method.
|
|
|
21 |
|
22 |
```yaml
|
23 |
models:
|
24 |
+
- model: bardsai/jaskier-7b-dpo-v5.6
|
25 |
# No parameters necessary for base model
|
26 |
- model: mlabonne/AlphaMonarch-7B
|
27 |
parameters:
|
28 |
density: 0.53
|
29 |
+
weight: 0.2
|
30 |
+
- model: mlabonne/NeuralMonarch-7B
|
31 |
parameters:
|
32 |
density: 0.53
|
33 |
+
weight: 0.4
|
34 |
- model: macadeliccc/MBX-7B-v3-DPO
|
35 |
parameters:
|
36 |
density: 0.53
|
37 |
+
weight: 0.4
|
38 |
merge_method: dare_ties
|
39 |
+
base_model: bardsai/jaskier-7b-dpo-v5.6
|
40 |
parameters:
|
41 |
int8_mask: true
|
42 |
dtype: bfloat16
|
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"MistralForCausalLM"
|
5 |
],
|
@@ -20,7 +20,7 @@
|
|
20 |
"sliding_window": 4096,
|
21 |
"tie_word_embeddings": false,
|
22 |
"torch_dtype": "bfloat16",
|
23 |
-
"transformers_version": "4.
|
24 |
"use_cache": true,
|
25 |
"vocab_size": 32000
|
26 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "bardsai/jaskier-7b-dpo-v5.6",
|
3 |
"architectures": [
|
4 |
"MistralForCausalLM"
|
5 |
],
|
|
|
20 |
"sliding_window": 4096,
|
21 |
"tie_word_embeddings": false,
|
22 |
"torch_dtype": "bfloat16",
|
23 |
+
"transformers_version": "4.38.1",
|
24 |
"use_cache": true,
|
25 |
"vocab_size": 32000
|
26 |
}
|
mergekit_config.yml
CHANGED
@@ -1,21 +1,21 @@
|
|
1 |
|
2 |
models:
|
3 |
-
- model:
|
4 |
# No parameters necessary for base model
|
5 |
- model: mlabonne/AlphaMonarch-7B
|
6 |
parameters:
|
7 |
density: 0.53
|
8 |
-
weight: 0.
|
9 |
-
- model:
|
10 |
parameters:
|
11 |
density: 0.53
|
12 |
-
weight: 0.
|
13 |
- model: macadeliccc/MBX-7B-v3-DPO
|
14 |
parameters:
|
15 |
density: 0.53
|
16 |
-
weight: 0.
|
17 |
merge_method: dare_ties
|
18 |
-
base_model:
|
19 |
parameters:
|
20 |
int8_mask: true
|
21 |
dtype: bfloat16
|
|
|
1 |
|
2 |
models:
|
3 |
+
- model: bardsai/jaskier-7b-dpo-v5.6
|
4 |
# No parameters necessary for base model
|
5 |
- model: mlabonne/AlphaMonarch-7B
|
6 |
parameters:
|
7 |
density: 0.53
|
8 |
+
weight: 0.2
|
9 |
+
- model: mlabonne/NeuralMonarch-7B
|
10 |
parameters:
|
11 |
density: 0.53
|
12 |
+
weight: 0.4
|
13 |
- model: macadeliccc/MBX-7B-v3-DPO
|
14 |
parameters:
|
15 |
density: 0.53
|
16 |
+
weight: 0.4
|
17 |
merge_method: dare_ties
|
18 |
+
base_model: bardsai/jaskier-7b-dpo-v5.6
|
19 |
parameters:
|
20 |
int8_mask: true
|
21 |
dtype: bfloat16
|
model-00001-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1979773128
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb8ec426f311795daecc23354815ed9a2f907cc13a1473561f38b5373f2b1a65
|
3 |
size 1979773128
|
model-00002-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1946235640
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:96ab8e2afed31a8e39411680cdff4871574f29b00de41bdf53b3f755041c22ad
|
3 |
size 1946235640
|
model-00003-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1973490216
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6fdef48e9255da2899e1aaa404e4201ea82a86c29e75b7d367de8674f69db2f0
|
3 |
size 1973490216
|
model-00004-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1979781464
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7609589e5d527d8bdb50ff81d3d674dc03e6fde8e9c099771cf47c9e24bccf9
|
3 |
size 1979781464
|
model-00005-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1946243984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c94725e9ff1f3442c275c4ad893adf58d64d6830a481985abf05c402edb6e528
|
3 |
size 1946243984
|
model-00006-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1923166040
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:00ee3e7b18f5e4f937d3d6000300e472e1eed348d0ae3c1f84bc55e88e0de7e0
|
3 |
size 1923166040
|
model-00007-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1946243984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9990a2b916b49644ccd7d460b900cc0f30bc8a2fc5398b5420977a3e1b2513a9
|
3 |
size 1946243984
|
model-00008-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 788563544
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9b521c676f57d16eee9357407cf51a1551a4fe7c5a8b926af94ffd0246351d5
|
3 |
size 788563544
|
special_tokens_map.json
CHANGED
@@ -1,9 +1,4 @@
|
|
1 |
{
|
2 |
-
"additional_special_tokens": [
|
3 |
-
"<unk>",
|
4 |
-
"<s>",
|
5 |
-
"</s>"
|
6 |
-
],
|
7 |
"bos_token": {
|
8 |
"content": "<s>",
|
9 |
"lstrip": false,
|
@@ -19,7 +14,7 @@
|
|
19 |
"single_word": false
|
20 |
},
|
21 |
"pad_token": {
|
22 |
-
"content": "
|
23 |
"lstrip": false,
|
24 |
"normalized": false,
|
25 |
"rstrip": false,
|
|
|
1 |
{
|
|
|
|
|
|
|
|
|
|
|
2 |
"bos_token": {
|
3 |
"content": "<s>",
|
4 |
"lstrip": false,
|
|
|
14 |
"single_word": false
|
15 |
},
|
16 |
"pad_token": {
|
17 |
+
"content": "<unk>",
|
18 |
"lstrip": false,
|
19 |
"normalized": false,
|
20 |
"rstrip": false,
|
tokenizer_config.json
CHANGED
@@ -27,23 +27,17 @@
|
|
27 |
"special": true
|
28 |
}
|
29 |
},
|
30 |
-
"additional_special_tokens": [
|
31 |
-
"<unk>",
|
32 |
-
"<s>",
|
33 |
-
"</s>"
|
34 |
-
],
|
35 |
"bos_token": "<s>",
|
36 |
-
"chat_template": "{% for message in messages %}{{bos_token + message['role'] + '\n' + message['content'] + eos_token + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ bos_token + 'assistant\n' }}{% endif %}",
|
37 |
"clean_up_tokenization_spaces": false,
|
38 |
"eos_token": "</s>",
|
39 |
"legacy": true,
|
40 |
-
"model_max_length":
|
41 |
-
"pad_token": "
|
42 |
-
"padding_side": "
|
43 |
"sp_model_kwargs": {},
|
44 |
"spaces_between_special_tokens": false,
|
45 |
-
"split_special_tokens": false,
|
46 |
"tokenizer_class": "LlamaTokenizer",
|
47 |
"unk_token": "<unk>",
|
48 |
-
"use_default_system_prompt":
|
49 |
}
|
|
|
27 |
"special": true
|
28 |
}
|
29 |
},
|
30 |
+
"additional_special_tokens": [],
|
|
|
|
|
|
|
|
|
31 |
"bos_token": "<s>",
|
|
|
32 |
"clean_up_tokenization_spaces": false,
|
33 |
"eos_token": "</s>",
|
34 |
"legacy": true,
|
35 |
+
"model_max_length": 32768,
|
36 |
+
"pad_token": "<unk>",
|
37 |
+
"padding_side": "right",
|
38 |
"sp_model_kwargs": {},
|
39 |
"spaces_between_special_tokens": false,
|
|
|
40 |
"tokenizer_class": "LlamaTokenizer",
|
41 |
"unk_token": "<unk>",
|
42 |
+
"use_default_system_prompt": false
|
43 |
}
|