issoh commited on
Commit
90483ec
·
verified ·
1 Parent(s): cd1f8b8

Upload 7 files

Browse files
README.md ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: []
3
+ library_name: transformers
4
+ tags:
5
+ - mergekit
6
+ - merge
7
+
8
+ ---
9
+ # final_merge
10
+
11
+ This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit).
12
+
13
+ ## Merge Details
14
+ ### Merge Method
15
+
16
+ This model was merged using the [task arithmetic](https://arxiv.org/abs/2212.04089) merge method using ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778 as a base.
17
+
18
+ ### Models Merged
19
+
20
+ The following models were included in the merge:
21
+ * ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
22
+
23
+ ### Configuration
24
+
25
+ The following YAML configuration was used to produce this model:
26
+
27
+ ```yaml
28
+ base_model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
29
+ dtype: bfloat16
30
+ merge_method: task_arithmetic
31
+ parameters:
32
+ int8_mask: 1.0
33
+ normalize: 0.0
34
+ slices:
35
+ - sources:
36
+ - layer_range: [0, 2]
37
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
38
+ parameters:
39
+ weight: 1.2951159694588346
40
+ - layer_range: [0, 2]
41
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
42
+ - sources:
43
+ - layer_range: [2, 4]
44
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
45
+ parameters:
46
+ weight: 0.628217046418056
47
+ - layer_range: [2, 4]
48
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
49
+ - sources:
50
+ - layer_range: [4, 6]
51
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
52
+ parameters:
53
+ weight: 1.0422707547278394
54
+ - layer_range: [4, 6]
55
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
56
+ - sources:
57
+ - layer_range: [6, 8]
58
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
59
+ parameters:
60
+ weight: 1.0683380976074854
61
+ - layer_range: [6, 8]
62
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
63
+ - sources:
64
+ - layer_range: [8, 10]
65
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
66
+ parameters:
67
+ weight: 0.26203994833534333
68
+ - layer_range: [8, 10]
69
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
70
+ - sources:
71
+ - layer_range: [10, 12]
72
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
73
+ parameters:
74
+ weight: 1.1263717498902737
75
+ - layer_range: [10, 12]
76
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
77
+ - sources:
78
+ - layer_range: [12, 14]
79
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
80
+ parameters:
81
+ weight: 0.887708708428289
82
+ - layer_range: [12, 14]
83
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
84
+ - sources:
85
+ - layer_range: [14, 16]
86
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
87
+ parameters:
88
+ weight: 1.2028184670045419
89
+ - layer_range: [14, 16]
90
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
91
+ - sources:
92
+ - layer_range: [16, 18]
93
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
94
+ parameters:
95
+ weight: 1.5253943623966824
96
+ - layer_range: [16, 18]
97
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
98
+ - sources:
99
+ - layer_range: [18, 20]
100
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
101
+ parameters:
102
+ weight: 0.9231084138587686
103
+ - layer_range: [18, 20]
104
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
105
+ - sources:
106
+ - layer_range: [20, 22]
107
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
108
+ parameters:
109
+ weight: 1.0382986550795958
110
+ - layer_range: [20, 22]
111
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
112
+ - sources:
113
+ - layer_range: [22, 24]
114
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
115
+ parameters:
116
+ weight: 1.0058822243315682
117
+ - layer_range: [22, 24]
118
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
119
+ - sources:
120
+ - layer_range: [24, 26]
121
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
122
+ parameters:
123
+ weight: 1.0496562280234227
124
+ - layer_range: [24, 26]
125
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
126
+ - sources:
127
+ - layer_range: [26, 28]
128
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
129
+ parameters:
130
+ weight: 1.4546744316577644
131
+ - layer_range: [26, 28]
132
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
133
+ - sources:
134
+ - layer_range: [28, 30]
135
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
136
+ parameters:
137
+ weight: 0.7126849392596979
138
+ - layer_range: [28, 30]
139
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
140
+ - sources:
141
+ - layer_range: [30, 32]
142
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
143
+ parameters:
144
+ weight: 0.31595188025306903
145
+ - layer_range: [30, 32]
146
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
147
+ - sources:
148
+ - layer_range: [32, 34]
149
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
150
+ parameters:
151
+ weight: 1.2021087899996585
152
+ - layer_range: [32, 34]
153
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
154
+ - sources:
155
+ - layer_range: [34, 36]
156
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
157
+ parameters:
158
+ weight: 0.9651661068819831
159
+ - layer_range: [34, 36]
160
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
161
+ - sources:
162
+ - layer_range: [36, 38]
163
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
164
+ parameters:
165
+ weight: 0.8787595708487486
166
+ - layer_range: [36, 38]
167
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
168
+ - sources:
169
+ - layer_range: [38, 40]
170
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
171
+ parameters:
172
+ weight: 0.3036739676118799
173
+ - layer_range: [38, 40]
174
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
175
+ ```
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "../evol_merge_storage/input_models/llm-jp-3-13b_2129051778",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 5120,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 13824,
15
+ "max_position_embeddings": 4096,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "num_attention_heads": 40,
19
+ "num_hidden_layers": 40,
20
+ "num_key_value_heads": 40,
21
+ "pretraining_tp": 1,
22
+ "rms_norm_eps": 1e-05,
23
+ "rope_scaling": null,
24
+ "rope_theta": 10000,
25
+ "tie_word_embeddings": false,
26
+ "torch_dtype": "bfloat16",
27
+ "transformers_version": "4.46.2",
28
+ "use_cache": true,
29
+ "vocab_size": 99584
30
+ }
mergekit_config.yml ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
2
+ dtype: bfloat16
3
+ merge_method: task_arithmetic
4
+ parameters:
5
+ int8_mask: 1.0
6
+ normalize: 0.0
7
+ slices:
8
+ - sources:
9
+ - layer_range: [0, 2]
10
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
11
+ parameters:
12
+ weight: 1.2951159694588346
13
+ - layer_range: [0, 2]
14
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
15
+ - sources:
16
+ - layer_range: [2, 4]
17
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
18
+ parameters:
19
+ weight: 0.628217046418056
20
+ - layer_range: [2, 4]
21
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
22
+ - sources:
23
+ - layer_range: [4, 6]
24
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
25
+ parameters:
26
+ weight: 1.0422707547278394
27
+ - layer_range: [4, 6]
28
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
29
+ - sources:
30
+ - layer_range: [6, 8]
31
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
32
+ parameters:
33
+ weight: 1.0683380976074854
34
+ - layer_range: [6, 8]
35
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
36
+ - sources:
37
+ - layer_range: [8, 10]
38
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
39
+ parameters:
40
+ weight: 0.26203994833534333
41
+ - layer_range: [8, 10]
42
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
43
+ - sources:
44
+ - layer_range: [10, 12]
45
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
46
+ parameters:
47
+ weight: 1.1263717498902737
48
+ - layer_range: [10, 12]
49
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
50
+ - sources:
51
+ - layer_range: [12, 14]
52
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
53
+ parameters:
54
+ weight: 0.887708708428289
55
+ - layer_range: [12, 14]
56
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
57
+ - sources:
58
+ - layer_range: [14, 16]
59
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
60
+ parameters:
61
+ weight: 1.2028184670045419
62
+ - layer_range: [14, 16]
63
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
64
+ - sources:
65
+ - layer_range: [16, 18]
66
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
67
+ parameters:
68
+ weight: 1.5253943623966824
69
+ - layer_range: [16, 18]
70
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
71
+ - sources:
72
+ - layer_range: [18, 20]
73
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
74
+ parameters:
75
+ weight: 0.9231084138587686
76
+ - layer_range: [18, 20]
77
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
78
+ - sources:
79
+ - layer_range: [20, 22]
80
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
81
+ parameters:
82
+ weight: 1.0382986550795958
83
+ - layer_range: [20, 22]
84
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
85
+ - sources:
86
+ - layer_range: [22, 24]
87
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
88
+ parameters:
89
+ weight: 1.0058822243315682
90
+ - layer_range: [22, 24]
91
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
92
+ - sources:
93
+ - layer_range: [24, 26]
94
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
95
+ parameters:
96
+ weight: 1.0496562280234227
97
+ - layer_range: [24, 26]
98
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
99
+ - sources:
100
+ - layer_range: [26, 28]
101
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
102
+ parameters:
103
+ weight: 1.4546744316577644
104
+ - layer_range: [26, 28]
105
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
106
+ - sources:
107
+ - layer_range: [28, 30]
108
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
109
+ parameters:
110
+ weight: 0.7126849392596979
111
+ - layer_range: [28, 30]
112
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
113
+ - sources:
114
+ - layer_range: [30, 32]
115
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
116
+ parameters:
117
+ weight: 0.31595188025306903
118
+ - layer_range: [30, 32]
119
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
120
+ - sources:
121
+ - layer_range: [32, 34]
122
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
123
+ parameters:
124
+ weight: 1.2021087899996585
125
+ - layer_range: [32, 34]
126
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
127
+ - sources:
128
+ - layer_range: [34, 36]
129
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
130
+ parameters:
131
+ weight: 0.9651661068819831
132
+ - layer_range: [34, 36]
133
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
134
+ - sources:
135
+ - layer_range: [36, 38]
136
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
137
+ parameters:
138
+ weight: 0.8787595708487486
139
+ - layer_range: [36, 38]
140
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
141
+ - sources:
142
+ - layer_range: [38, 40]
143
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b-instruct_3918994333
144
+ parameters:
145
+ weight: 0.3036739676118799
146
+ - layer_range: [38, 40]
147
+ model: ../evol_merge_storage/input_models/llm-jp-3-13b_2129051778
model.safetensors.index.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metadata": {"mergekit_version": "0.0.5.2", "total_size": 27415848960}, "weight_map": {"lm_head.weight": "model-00001-of-00006.safetensors", "model.embed_tokens.weight": "model-00001-of-00006.safetensors", "model.layers.0.input_layernorm.weight": "model-00001-of-00006.safetensors", "model.layers.0.mlp.down_proj.weight": "model-00001-of-00006.safetensors", "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00006.safetensors", "model.layers.0.mlp.up_proj.weight": "model-00001-of-00006.safetensors", "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", "model.layers.1.input_layernorm.weight": "model-00001-of-00006.safetensors", "model.layers.1.mlp.down_proj.weight": "model-00001-of-00006.safetensors", "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00006.safetensors", "model.layers.1.mlp.up_proj.weight": "model-00001-of-00006.safetensors", "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", "model.layers.10.input_layernorm.weight": "model-00001-of-00006.safetensors", "model.layers.10.mlp.down_proj.weight": "model-00001-of-00006.safetensors", "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00006.safetensors", "model.layers.10.mlp.up_proj.weight": "model-00001-of-00006.safetensors", "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", "model.layers.11.input_layernorm.weight": "model-00001-of-00006.safetensors", "model.layers.11.mlp.down_proj.weight": "model-00001-of-00006.safetensors", "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00006.safetensors", "model.layers.11.mlp.up_proj.weight": "model-00001-of-00006.safetensors", "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", "model.layers.12.input_layernorm.weight": "model-00001-of-00006.safetensors", "model.layers.12.mlp.down_proj.weight": "model-00001-of-00006.safetensors", "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00006.safetensors", "model.layers.12.mlp.up_proj.weight": "model-00002-of-00006.safetensors", "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", "model.layers.13.input_layernorm.weight": "model-00002-of-00006.safetensors", "model.layers.13.mlp.down_proj.weight": "model-00002-of-00006.safetensors", "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", "model.layers.13.mlp.up_proj.weight": "model-00002-of-00006.safetensors", "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", "model.layers.14.input_layernorm.weight": "model-00002-of-00006.safetensors", "model.layers.14.mlp.down_proj.weight": "model-00002-of-00006.safetensors", "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", "model.layers.14.mlp.up_proj.weight": "model-00002-of-00006.safetensors", "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", "model.layers.15.input_layernorm.weight": "model-00002-of-00006.safetensors", "model.layers.15.mlp.down_proj.weight": "model-00002-of-00006.safetensors", "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", "model.layers.15.mlp.up_proj.weight": "model-00002-of-00006.safetensors", "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", "model.layers.16.input_layernorm.weight": "model-00002-of-00006.safetensors", "model.layers.16.mlp.down_proj.weight": "model-00002-of-00006.safetensors", "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", "model.layers.16.mlp.up_proj.weight": "model-00002-of-00006.safetensors", "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", "model.layers.17.input_layernorm.weight": "model-00002-of-00006.safetensors", "model.layers.17.mlp.down_proj.weight": "model-00002-of-00006.safetensors", "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", "model.layers.17.mlp.up_proj.weight": "model-00002-of-00006.safetensors", "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", "model.layers.18.input_layernorm.weight": "model-00002-of-00006.safetensors", "model.layers.18.mlp.down_proj.weight": "model-00002-of-00006.safetensors", "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", "model.layers.18.mlp.up_proj.weight": "model-00002-of-00006.safetensors", "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", "model.layers.19.input_layernorm.weight": "model-00002-of-00006.safetensors", "model.layers.19.mlp.down_proj.weight": "model-00002-of-00006.safetensors", "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", "model.layers.19.mlp.up_proj.weight": "model-00002-of-00006.safetensors", "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", "model.layers.2.input_layernorm.weight": "model-00002-of-00006.safetensors", "model.layers.2.mlp.down_proj.weight": "model-00002-of-00006.safetensors", "model.layers.2.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", "model.layers.2.mlp.up_proj.weight": "model-00003-of-00006.safetensors", "model.layers.2.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", "model.layers.2.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", "model.layers.2.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", "model.layers.2.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", "model.layers.2.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", "model.layers.20.input_layernorm.weight": "model-00003-of-00006.safetensors", "model.layers.20.mlp.down_proj.weight": "model-00003-of-00006.safetensors", "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", "model.layers.20.mlp.up_proj.weight": "model-00003-of-00006.safetensors", "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", "model.layers.20.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", "model.layers.21.input_layernorm.weight": "model-00003-of-00006.safetensors", "model.layers.21.mlp.down_proj.weight": "model-00003-of-00006.safetensors", "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", "model.layers.21.mlp.up_proj.weight": "model-00003-of-00006.safetensors", "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", "model.layers.22.input_layernorm.weight": "model-00003-of-00006.safetensors", "model.layers.22.mlp.down_proj.weight": "model-00003-of-00006.safetensors", "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", "model.layers.22.mlp.up_proj.weight": "model-00003-of-00006.safetensors", "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", "model.layers.23.input_layernorm.weight": "model-00003-of-00006.safetensors", "model.layers.23.mlp.down_proj.weight": "model-00003-of-00006.safetensors", "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", "model.layers.23.mlp.up_proj.weight": "model-00003-of-00006.safetensors", "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", "model.layers.24.input_layernorm.weight": "model-00003-of-00006.safetensors", "model.layers.24.mlp.down_proj.weight": "model-00003-of-00006.safetensors", "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", "model.layers.24.mlp.up_proj.weight": "model-00003-of-00006.safetensors", "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", "model.layers.25.input_layernorm.weight": "model-00003-of-00006.safetensors", "model.layers.25.mlp.down_proj.weight": "model-00003-of-00006.safetensors", "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", "model.layers.25.mlp.up_proj.weight": "model-00003-of-00006.safetensors", "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", "model.layers.26.input_layernorm.weight": "model-00003-of-00006.safetensors", "model.layers.26.mlp.down_proj.weight": "model-00003-of-00006.safetensors", "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", "model.layers.26.mlp.up_proj.weight": "model-00003-of-00006.safetensors", "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", "model.layers.27.input_layernorm.weight": "model-00003-of-00006.safetensors", "model.layers.27.mlp.down_proj.weight": "model-00004-of-00006.safetensors", "model.layers.27.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", "model.layers.27.mlp.up_proj.weight": "model-00004-of-00006.safetensors", "model.layers.27.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", "model.layers.27.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", "model.layers.27.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", "model.layers.27.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", "model.layers.27.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", "model.layers.28.input_layernorm.weight": "model-00004-of-00006.safetensors", "model.layers.28.mlp.down_proj.weight": "model-00004-of-00006.safetensors", "model.layers.28.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", "model.layers.28.mlp.up_proj.weight": "model-00004-of-00006.safetensors", "model.layers.28.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", "model.layers.28.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", "model.layers.28.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", "model.layers.28.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", "model.layers.28.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", "model.layers.29.input_layernorm.weight": "model-00004-of-00006.safetensors", "model.layers.29.mlp.down_proj.weight": "model-00004-of-00006.safetensors", "model.layers.29.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", "model.layers.29.mlp.up_proj.weight": "model-00004-of-00006.safetensors", "model.layers.29.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", "model.layers.29.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", "model.layers.29.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", "model.layers.29.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", "model.layers.29.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", "model.layers.3.input_layernorm.weight": "model-00004-of-00006.safetensors", "model.layers.3.mlp.down_proj.weight": "model-00004-of-00006.safetensors", "model.layers.3.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", "model.layers.3.mlp.up_proj.weight": "model-00004-of-00006.safetensors", "model.layers.3.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", "model.layers.3.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", "model.layers.3.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", "model.layers.3.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", "model.layers.3.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", "model.layers.30.input_layernorm.weight": "model-00004-of-00006.safetensors", "model.layers.30.mlp.down_proj.weight": "model-00004-of-00006.safetensors", "model.layers.30.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", "model.layers.30.mlp.up_proj.weight": "model-00004-of-00006.safetensors", "model.layers.30.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", "model.layers.30.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", "model.layers.30.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", "model.layers.30.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", "model.layers.30.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", "model.layers.31.input_layernorm.weight": "model-00004-of-00006.safetensors", "model.layers.31.mlp.down_proj.weight": "model-00004-of-00006.safetensors", "model.layers.31.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", "model.layers.31.mlp.up_proj.weight": "model-00004-of-00006.safetensors", "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", "model.layers.31.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", "model.layers.31.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", "model.layers.31.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", "model.layers.31.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", "model.layers.32.input_layernorm.weight": "model-00004-of-00006.safetensors", "model.layers.32.mlp.down_proj.weight": "model-00004-of-00006.safetensors", "model.layers.32.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", "model.layers.32.mlp.up_proj.weight": "model-00004-of-00006.safetensors", "model.layers.32.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", "model.layers.32.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", "model.layers.32.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", "model.layers.32.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", "model.layers.32.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", "model.layers.33.input_layernorm.weight": "model-00004-of-00006.safetensors", "model.layers.33.mlp.down_proj.weight": "model-00004-of-00006.safetensors", "model.layers.33.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", "model.layers.33.mlp.up_proj.weight": "model-00004-of-00006.safetensors", "model.layers.33.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", "model.layers.33.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", "model.layers.33.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", "model.layers.33.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", "model.layers.33.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", "model.layers.34.input_layernorm.weight": "model-00005-of-00006.safetensors", "model.layers.34.mlp.down_proj.weight": "model-00005-of-00006.safetensors", "model.layers.34.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", "model.layers.34.mlp.up_proj.weight": "model-00005-of-00006.safetensors", "model.layers.34.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", "model.layers.34.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", "model.layers.34.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", "model.layers.34.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", "model.layers.34.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", "model.layers.35.input_layernorm.weight": "model-00005-of-00006.safetensors", "model.layers.35.mlp.down_proj.weight": "model-00005-of-00006.safetensors", "model.layers.35.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", "model.layers.35.mlp.up_proj.weight": "model-00005-of-00006.safetensors", "model.layers.35.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", "model.layers.35.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", "model.layers.35.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", "model.layers.35.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", "model.layers.35.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", "model.layers.36.input_layernorm.weight": "model-00005-of-00006.safetensors", "model.layers.36.mlp.down_proj.weight": "model-00005-of-00006.safetensors", "model.layers.36.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", "model.layers.36.mlp.up_proj.weight": "model-00005-of-00006.safetensors", "model.layers.36.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", "model.layers.36.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", "model.layers.36.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", "model.layers.36.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", "model.layers.36.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", "model.layers.37.input_layernorm.weight": "model-00005-of-00006.safetensors", "model.layers.37.mlp.down_proj.weight": "model-00005-of-00006.safetensors", "model.layers.37.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", "model.layers.37.mlp.up_proj.weight": "model-00005-of-00006.safetensors", "model.layers.37.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", "model.layers.37.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", "model.layers.37.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", "model.layers.37.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", "model.layers.37.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", "model.layers.38.input_layernorm.weight": "model-00005-of-00006.safetensors", "model.layers.38.mlp.down_proj.weight": "model-00005-of-00006.safetensors", "model.layers.38.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", "model.layers.38.mlp.up_proj.weight": "model-00005-of-00006.safetensors", "model.layers.38.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", "model.layers.38.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", "model.layers.38.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", "model.layers.38.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", "model.layers.38.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", "model.layers.39.input_layernorm.weight": "model-00005-of-00006.safetensors", "model.layers.39.mlp.down_proj.weight": "model-00005-of-00006.safetensors", "model.layers.39.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", "model.layers.39.mlp.up_proj.weight": "model-00005-of-00006.safetensors", "model.layers.39.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", "model.layers.39.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", "model.layers.39.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", "model.layers.39.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", "model.layers.39.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", "model.layers.4.input_layernorm.weight": "model-00005-of-00006.safetensors", "model.layers.4.mlp.down_proj.weight": "model-00005-of-00006.safetensors", "model.layers.4.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", "model.layers.4.mlp.up_proj.weight": "model-00005-of-00006.safetensors", "model.layers.4.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", "model.layers.4.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", "model.layers.4.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", "model.layers.4.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", "model.layers.4.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", "model.layers.5.input_layernorm.weight": "model-00005-of-00006.safetensors", "model.layers.5.mlp.down_proj.weight": "model-00005-of-00006.safetensors", "model.layers.5.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", "model.layers.5.mlp.up_proj.weight": "model-00005-of-00006.safetensors", "model.layers.5.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", "model.layers.5.self_attn.k_proj.weight": "model-00006-of-00006.safetensors", "model.layers.5.self_attn.o_proj.weight": "model-00006-of-00006.safetensors", "model.layers.5.self_attn.q_proj.weight": "model-00006-of-00006.safetensors", "model.layers.5.self_attn.v_proj.weight": "model-00006-of-00006.safetensors", "model.layers.6.input_layernorm.weight": "model-00006-of-00006.safetensors", "model.layers.6.mlp.down_proj.weight": "model-00006-of-00006.safetensors", "model.layers.6.mlp.gate_proj.weight": "model-00006-of-00006.safetensors", "model.layers.6.mlp.up_proj.weight": "model-00006-of-00006.safetensors", "model.layers.6.post_attention_layernorm.weight": "model-00006-of-00006.safetensors", "model.layers.6.self_attn.k_proj.weight": "model-00006-of-00006.safetensors", "model.layers.6.self_attn.o_proj.weight": "model-00006-of-00006.safetensors", "model.layers.6.self_attn.q_proj.weight": "model-00006-of-00006.safetensors", "model.layers.6.self_attn.v_proj.weight": "model-00006-of-00006.safetensors", "model.layers.7.input_layernorm.weight": "model-00006-of-00006.safetensors", "model.layers.7.mlp.down_proj.weight": "model-00006-of-00006.safetensors", "model.layers.7.mlp.gate_proj.weight": "model-00006-of-00006.safetensors", "model.layers.7.mlp.up_proj.weight": "model-00006-of-00006.safetensors", "model.layers.7.post_attention_layernorm.weight": "model-00006-of-00006.safetensors", "model.layers.7.self_attn.k_proj.weight": "model-00006-of-00006.safetensors", "model.layers.7.self_attn.o_proj.weight": "model-00006-of-00006.safetensors", "model.layers.7.self_attn.q_proj.weight": "model-00006-of-00006.safetensors", "model.layers.7.self_attn.v_proj.weight": "model-00006-of-00006.safetensors", "model.layers.8.input_layernorm.weight": "model-00006-of-00006.safetensors", "model.layers.8.mlp.down_proj.weight": "model-00006-of-00006.safetensors", "model.layers.8.mlp.gate_proj.weight": "model-00006-of-00006.safetensors", "model.layers.8.mlp.up_proj.weight": "model-00006-of-00006.safetensors", "model.layers.8.post_attention_layernorm.weight": "model-00006-of-00006.safetensors", "model.layers.8.self_attn.k_proj.weight": "model-00006-of-00006.safetensors", "model.layers.8.self_attn.o_proj.weight": "model-00006-of-00006.safetensors", "model.layers.8.self_attn.q_proj.weight": "model-00006-of-00006.safetensors", "model.layers.8.self_attn.v_proj.weight": "model-00006-of-00006.safetensors", "model.layers.9.input_layernorm.weight": "model-00006-of-00006.safetensors", "model.layers.9.mlp.down_proj.weight": "model-00006-of-00006.safetensors", "model.layers.9.mlp.gate_proj.weight": "model-00006-of-00006.safetensors", "model.layers.9.mlp.up_proj.weight": "model-00006-of-00006.safetensors", "model.layers.9.post_attention_layernorm.weight": "model-00006-of-00006.safetensors", "model.layers.9.self_attn.k_proj.weight": "model-00006-of-00006.safetensors", "model.layers.9.self_attn.o_proj.weight": "model-00006-of-00006.safetensors", "model.layers.9.self_attn.q_proj.weight": "model-00006-of-00006.safetensors", "model.layers.9.self_attn.v_proj.weight": "model-00006-of-00006.safetensors", "model.norm.weight": "model-00006-of-00006.safetensors"}}
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<CLS|LLM-jp>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<MASK|LLM-jp>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<PAD|LLM-jp>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "<SEP|LLM-jp>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "3": {
30
+ "content": "<MASK|LLM-jp>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "4": {
38
+ "content": "<PAD|LLM-jp>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "5": {
46
+ "content": "<CLS|LLM-jp>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "6": {
54
+ "content": "<SEP|LLM-jp>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "7": {
62
+ "content": "<EOD|LLM-jp>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ }
69
+ },
70
+ "bos_token": "<s>",
71
+ "clean_up_tokenization_spaces": false,
72
+ "cls_token": "<CLS|LLM-jp>",
73
+ "eod_token": "</s>",
74
+ "eos_token": "</s>",
75
+ "extra_ids": 0,
76
+ "mask_token": "<MASK|LLM-jp>",
77
+ "model_max_length": 1000000000000000019884624838656,
78
+ "pad_token": "<PAD|LLM-jp>",
79
+ "sep_token": "<SEP|LLM-jp>",
80
+ "sp_model_kwargs": {},
81
+ "tokenizer_class": "PreTrainedTokenizerFast",
82
+ "unk_token": "<unk>"
83
+ }