Upload 15 files
Browse files- README.md +26 -0
- config.json +41 -0
- mergekit_config.yml +88 -0
- model.safetensors.index.json +0 -0
- output-00001-of-00007.safetensors +3 -0
- output-00002-of-00007.safetensors +3 -0
- output-00003-of-00007.safetensors +3 -0
- output-00004-of-00007.safetensors +3 -0
- output-00005-of-00007.safetensors +3 -0
- output-00006-of-00007.safetensors +3 -0
- output-00007-of-00007.safetensors +3 -0
- special_tokens_map.json +30 -0
- tokenizer.json +0 -0
- tokenizer.model +3 -0
- tokenizer_config.json +42 -0
README.md
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model:
|
3 |
+
- jukofyork/Dark-Miqu-70B
|
4 |
+
- sophosympatheia/Midnight-Miqu-70B-v1.5
|
5 |
+
- jukofyork/Dawn-Miqu-70B
|
6 |
+
library_name: transformers
|
7 |
+
tags:
|
8 |
+
- mergekit
|
9 |
+
- merge
|
10 |
+
license: other
|
11 |
+
---
|
12 |
+
Twilight Miqu is a Story writing model and is composed from sophosympatheia/Midnight-Miqu-70B-v1.5, jukofyork/Dawn-Miqu-70B and jukofyork/Dark-Miqu-70B
|
13 |
+
|
14 |
+
It is an experiment to see if large models are more coherent on story writing tasks.
|
15 |
+
Twilight = Midnight + Dawn + Dark
|
16 |
+
|
17 |
+
Please see this model card for details and usage instructions.
|
18 |
+
https://huggingface.co/sophosympatheia/Midnight-Miqu-70B-v1.5
|
19 |
+
|
20 |
+
This model is based on Miqu so it's capable of 32K context.
|
21 |
+
|
22 |
+
All miqu-derived models, including this merge, are only suitable for personal use. Mistral has been cool about it so far, but you should be aware that by downloading this merge you are assuming whatever legal risk is inherent in acquiring and using a model based on leaked weights. This merge comes with no warranties or guarantees of any kind, but you probably already knew that.
|
23 |
+
|
24 |
+
This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit).
|
25 |
+
|
26 |
+
A big thank you to Mistral, sophosympatheia and jukofyork for the original models!
|
config.json
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "softwareweaver/Twilight-Miqu-146B",
|
3 |
+
"architectures": [
|
4 |
+
"LlamaForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_bias": false,
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 1,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"hidden_act": "silu",
|
11 |
+
"hidden_size": 8192,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 28672,
|
14 |
+
"max_position_embeddings": 32764,
|
15 |
+
"mlp_bias": false,
|
16 |
+
"model_type": "llama",
|
17 |
+
"num_attention_heads": 64,
|
18 |
+
"num_hidden_layers": 170,
|
19 |
+
"num_key_value_heads": 8,
|
20 |
+
"pad_token_id": 0,
|
21 |
+
"pretraining_tp": 1,
|
22 |
+
"rms_norm_eps": 1e-05,
|
23 |
+
"rope_scaling": null,
|
24 |
+
"rope_theta": 1000000,
|
25 |
+
"tie_word_embeddings": false,
|
26 |
+
"torch_dtype": "float16",
|
27 |
+
"transformers_version": "4.41.1",
|
28 |
+
"use_cache": true,
|
29 |
+
"vocab_size": 32000,
|
30 |
+
"quantization_config": {
|
31 |
+
"quant_method": "exl2",
|
32 |
+
"version": "0.1.1",
|
33 |
+
"bits": 3.25,
|
34 |
+
"head_bits": 6,
|
35 |
+
"calibration": {
|
36 |
+
"rows": 100,
|
37 |
+
"length": 2048,
|
38 |
+
"dataset": "(default)"
|
39 |
+
}
|
40 |
+
}
|
41 |
+
}
|
mergekit_config.yml
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
const_tag: &MODEL1 sophosympatheia/Midnight-Miqu-70B-v1.5
|
2 |
+
const_tag: &MODEL3 jukofyork/Dawn-Miqu-70B
|
3 |
+
const_tag: &MODEL2 jukofyork/Dark-Miqu-70B
|
4 |
+
|
5 |
+
const_tag: &QK_ATTENUATION_FACTOR 0.8408964153 # sqrt(sqrt(1/2))
|
6 |
+
const_tag: &MLP_DOWN_SCALE_FACTOR 0.7071067812 # sqrt(1/2)
|
7 |
+
|
8 |
+
scale-filter-env: &scale_filter_env
|
9 |
+
parameters:
|
10 |
+
scale:
|
11 |
+
- filter: q_proj
|
12 |
+
value: *QK_ATTENUATION_FACTOR
|
13 |
+
- filter: k_proj
|
14 |
+
value: *QK_ATTENUATION_FACTOR
|
15 |
+
- filter: down_proj
|
16 |
+
value: *MLP_DOWN_SCALE_FACTOR
|
17 |
+
- value: 1.0
|
18 |
+
|
19 |
+
slices:
|
20 |
+
- sources:
|
21 |
+
- model: *MODEL1
|
22 |
+
layer_range: [0, 10]
|
23 |
+
- sources:
|
24 |
+
- model: *MODEL1
|
25 |
+
layer_range: [10, 20]
|
26 |
+
<<: *scale_filter_env
|
27 |
+
- sources:
|
28 |
+
- model: *MODEL2
|
29 |
+
layer_range: [10, 20]
|
30 |
+
<<: *scale_filter_env
|
31 |
+
- sources:
|
32 |
+
- model: *MODEL3
|
33 |
+
layer_range: [10, 20]
|
34 |
+
<<: *scale_filter_env
|
35 |
+
- sources:
|
36 |
+
- model: *MODEL3
|
37 |
+
layer_range: [20, 30]
|
38 |
+
<<: *scale_filter_env
|
39 |
+
- sources:
|
40 |
+
- model: *MODEL2
|
41 |
+
layer_range: [20, 30]
|
42 |
+
<<: *scale_filter_env
|
43 |
+
- sources:
|
44 |
+
- model: *MODEL1
|
45 |
+
layer_range: [30, 40]
|
46 |
+
<<: *scale_filter_env
|
47 |
+
- sources:
|
48 |
+
- model: *MODEL2
|
49 |
+
layer_range: [30, 40]
|
50 |
+
<<: *scale_filter_env
|
51 |
+
- sources:
|
52 |
+
- model: *MODEL3
|
53 |
+
layer_range: [40, 50]
|
54 |
+
<<: *scale_filter_env
|
55 |
+
- sources:
|
56 |
+
- model: *MODEL2
|
57 |
+
layer_range: [40, 50]
|
58 |
+
<<: *scale_filter_env
|
59 |
+
- sources:
|
60 |
+
- model: *MODEL1
|
61 |
+
layer_range: [50, 60]
|
62 |
+
<<: *scale_filter_env
|
63 |
+
- sources:
|
64 |
+
- model: *MODEL2
|
65 |
+
layer_range: [50, 60]
|
66 |
+
<<: *scale_filter_env
|
67 |
+
- sources:
|
68 |
+
- model: *MODEL3
|
69 |
+
layer_range: [50, 60]
|
70 |
+
<<: *scale_filter_env
|
71 |
+
- sources:
|
72 |
+
- model: *MODEL1
|
73 |
+
layer_range: [60, 70]
|
74 |
+
<<: *scale_filter_env
|
75 |
+
- sources:
|
76 |
+
- model: *MODEL2
|
77 |
+
layer_range: [60, 70]
|
78 |
+
<<: *scale_filter_env
|
79 |
+
- sources:
|
80 |
+
- model: *MODEL3
|
81 |
+
layer_range: [60, 70]
|
82 |
+
<<: *scale_filter_env
|
83 |
+
- sources:
|
84 |
+
- model: *MODEL1
|
85 |
+
layer_range: [70, 80]
|
86 |
+
|
87 |
+
merge_method: passthrough
|
88 |
+
dtype: float16
|
model.safetensors.index.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
output-00001-of-00007.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:910f95d089734d051fd93fbda610f86125e0a14fa38b5cf863176a987a2d8d96
|
3 |
+
size 8569296702
|
output-00002-of-00007.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f0722fa03598d916896ee466bc9152f6372211395daa3fc5e142603e822d0c77
|
3 |
+
size 8534818552
|
output-00003-of-00007.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:826051cb9bc1a2521e56b59c6913be453e013b2771ac6cde726dd4a005b4d5da
|
3 |
+
size 8555759082
|
output-00004-of-00007.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1eb82878a8f062934f87eccc567418304d75ac10dfacc407c8c259b0087f97fc
|
3 |
+
size 8571517546
|
output-00005-of-00007.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:52db074d34e2165b47b602565c00bf3817d35c3935f4f16464bdb6c4188c2919
|
3 |
+
size 8547881050
|
output-00006-of-00007.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f342830b0a61e6043246fd8238b3a6e5ba9819b5f757d167cd56d14b8e41d19
|
3 |
+
size 8547110166
|
output-00007-of-00007.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3506a9e81b15dd308b3ef954bdf14879d38076f2a34be3f8d7ccc8e395dce8dc
|
3 |
+
size 8500164638
|
special_tokens_map.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": true,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "</s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": true,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "<unk>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": true,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"unk_token": {
|
24 |
+
"content": "<unk>",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": true,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
}
|
30 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
3 |
+
size 499723
|
tokenizer_config.json
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"add_prefix_space": true,
|
5 |
+
"added_tokens_decoder": {
|
6 |
+
"0": {
|
7 |
+
"content": "<unk>",
|
8 |
+
"lstrip": false,
|
9 |
+
"normalized": true,
|
10 |
+
"rstrip": false,
|
11 |
+
"single_word": false,
|
12 |
+
"special": true
|
13 |
+
},
|
14 |
+
"1": {
|
15 |
+
"content": "<s>",
|
16 |
+
"lstrip": false,
|
17 |
+
"normalized": true,
|
18 |
+
"rstrip": false,
|
19 |
+
"single_word": false,
|
20 |
+
"special": true
|
21 |
+
},
|
22 |
+
"2": {
|
23 |
+
"content": "</s>",
|
24 |
+
"lstrip": false,
|
25 |
+
"normalized": true,
|
26 |
+
"rstrip": false,
|
27 |
+
"single_word": false,
|
28 |
+
"special": true
|
29 |
+
}
|
30 |
+
},
|
31 |
+
"bos_token": "<s>",
|
32 |
+
"clean_up_tokenization_spaces": false,
|
33 |
+
"eos_token": "</s>",
|
34 |
+
"legacy": false,
|
35 |
+
"model_max_length": 1000000000000000019884624838656,
|
36 |
+
"pad_token": "<unk>",
|
37 |
+
"sp_model_kwargs": {},
|
38 |
+
"spaces_between_special_tokens": false,
|
39 |
+
"tokenizer_class": "LlamaTokenizer",
|
40 |
+
"unk_token": "<unk>",
|
41 |
+
"use_default_system_prompt": false
|
42 |
+
}
|