fixup IQ4_KSS recipe
Browse files
README.md
CHANGED
@@ -135,9 +135,17 @@ blk\..*\.ffn_down_shexp\.weight=q6_0
|
|
135 |
blk\..*\.ffn_(gate|up)_shexp\.weight=iq5_ks
|
136 |
|
137 |
# Routed Experts Layers [1-46]
|
138 |
-
blk
|
|
|
|
|
|
|
139 |
blk\..*\.ffn_(gate|up)_exps\.weight=iq4_kss
|
140 |
|
|
|
|
|
|
|
|
|
|
|
141 |
# Non-Repeating Layers
|
142 |
token_embd\.weight=iq4_k
|
143 |
output\.weight=iq6_k
|
@@ -148,11 +156,11 @@ custom=$(
|
|
148 |
sed -Ez 's:\n+:,:g;s:,$::;s:^,::'
|
149 |
)
|
150 |
|
151 |
-
numactl -N
|
152 |
./build/bin/llama-quantize \
|
153 |
--custom-q "$custom" \
|
154 |
--imatrix /mnt/raid/models/ubergarm/GLM-4.5-Air-GGUF/imatrix-GLM-4.5-Air-BF16.dat \
|
155 |
-
/mnt/raid/models/ubergarm/GLM-4.5-Air-GGUF/GLM-4.5-Air-
|
156 |
/mnt/raid/models/ubergarm/GLM-4.5-Air-GGUF/GLM-4.5-Air-IQ4_KSS.gguf \
|
157 |
IQ4_KSS \
|
158 |
192
|
|
|
135 |
blk\..*\.ffn_(gate|up)_shexp\.weight=iq5_ks
|
136 |
|
137 |
# Routed Experts Layers [1-46]
|
138 |
+
#blk\.(1|46)\.ffn_down_exps\.weight=q8_0
|
139 |
+
#blk\.(1|46)\.ffn_(gate|up)_exps\.weight=q8_0
|
140 |
+
|
141 |
+
blk\..*\.ffn_down_exps\.weight=iq4_nl
|
142 |
blk\..*\.ffn_(gate|up)_exps\.weight=iq4_kss
|
143 |
|
144 |
+
# NextN MTP Layer [46]
|
145 |
+
blk\..*\.nextn\.embed_tokens\.weight=iq5_ks
|
146 |
+
blk\..*\.nextn\.shared_head_head\.weight=iq5_ks
|
147 |
+
blk\..*\.nextn\.eh_proj\.weight=q8_0
|
148 |
+
|
149 |
# Non-Repeating Layers
|
150 |
token_embd\.weight=iq4_k
|
151 |
output\.weight=iq6_k
|
|
|
156 |
sed -Ez 's:\n+:,:g;s:,$::;s:^,::'
|
157 |
)
|
158 |
|
159 |
+
numactl -N 0 -m 0 \
|
160 |
./build/bin/llama-quantize \
|
161 |
--custom-q "$custom" \
|
162 |
--imatrix /mnt/raid/models/ubergarm/GLM-4.5-Air-GGUF/imatrix-GLM-4.5-Air-BF16.dat \
|
163 |
+
/mnt/raid/models/ubergarm/GLM-4.5-Air-GGUF/GLM-4.5-Air-128x9.4B-BF16-00001-of-00005.gguf \
|
164 |
/mnt/raid/models/ubergarm/GLM-4.5-Air-GGUF/GLM-4.5-Air-IQ4_KSS.gguf \
|
165 |
IQ4_KSS \
|
166 |
192
|