Add IQ4_KSS and IQ3_KS
Browse files
README.md
CHANGED
@@ -172,6 +172,55 @@ numactl -N 1 -m 1 \
|
|
172 |
</details>
|
173 |
|
174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
## `IQ3_K` 106.644 GiB (3.897 BPW)
|
176 |
Final estimate: PPL = 4.4561 +/- 0.02657
|
177 |
|
@@ -217,6 +266,56 @@ numactl -N 1 -m 1 \
|
|
217 |
|
218 |
</details>
|
219 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
## `IQ2_KL` 81.866 GiB (2.991 BPW)
|
221 |
Final estimate: PPL = 4.7912 +/- 0.02910
|
222 |
|
|
|
172 |
</details>
|
173 |
|
174 |
|
175 |
+
## `IQ4_KSS` 115.085 GiB (4.205 BPW)
|
176 |
+
Final estimate: PPL = 4.4017 +/- 0.02614
|
177 |
+
|
178 |
+
<details>
|
179 |
+
|
180 |
+
This one is a little funky just for fun. Seems smort!
|
181 |
+
|
182 |
+
<summary>👈 Secret Recipe</summary>
|
183 |
+
|
184 |
+
```bash
|
185 |
+
#!/usr/bin/env bash
|
186 |
+
|
187 |
+
# Repeating Layers [0-93]
|
188 |
+
|
189 |
+
custom="
|
190 |
+
# Attention
|
191 |
+
blk\..*\.attn_q.*=iq6_k
|
192 |
+
blk\..*\.attn_k.*=q8_0
|
193 |
+
blk\..*\.attn_v.*=q8_0
|
194 |
+
blk\..*\.attn_output.*=iq6_k
|
195 |
+
|
196 |
+
# Routed Experts
|
197 |
+
blk\.(0|1|2|3)\.ffn_down_exps\.weight=iq5_ks
|
198 |
+
blk\.(0|1|2|3)\.ffn_(gate|up)_exps\.weight=iq4_ks
|
199 |
+
blk\..*\.ffn_down_exps\.weight=iq4_ks
|
200 |
+
blk\..*\.ffn_(gate|up)_exps\.weight=iq4_kss
|
201 |
+
|
202 |
+
# Token Embedding
|
203 |
+
token_embd\.weight=iq4_k
|
204 |
+
output\.weight=iq6_k
|
205 |
+
"
|
206 |
+
|
207 |
+
custom=$(
|
208 |
+
echo "$custom" | grep -v '^#' | \
|
209 |
+
sed -Ez 's:\n+:,:g;s:,$::;s:^,::'
|
210 |
+
)
|
211 |
+
|
212 |
+
numactl -N 0 -m 0 \
|
213 |
+
./build/bin/llama-quantize \
|
214 |
+
--custom-q "$custom" \
|
215 |
+
--imatrix /mnt/raid/models/ubergarm/Qwen3-235B-A22B-Instruct-2507-GGUF/imatrix-Qwen3-235B-A22B-Instruct-2507-BF16.dat \
|
216 |
+
/mnt/raid/models/ubergarm/Qwen3-235B-A22B-Instruct-2507-GGUF/Qwen3-235B-A22B-Instruct-2507-BF16-00001-of-00010.gguf \
|
217 |
+
/mnt/raid/models/ubergarm/Qwen3-235B-A22B-Instruct-2507-GGUF/Qwen3-235B-A22B-Instruct-2507-IQ4_KSS.gguf \
|
218 |
+
IQ4_KSS \
|
219 |
+
192
|
220 |
+
```
|
221 |
+
|
222 |
+
</details>
|
223 |
+
|
224 |
## `IQ3_K` 106.644 GiB (3.897 BPW)
|
225 |
Final estimate: PPL = 4.4561 +/- 0.02657
|
226 |
|
|
|
266 |
|
267 |
</details>
|
268 |
|
269 |
+
## `IQ3_KS` 101.308 GiB (3.702 BPW)
|
270 |
+
Final estimate: PPL = 4.4915 +/- 0.02685
|
271 |
+
|
272 |
+
<details>
|
273 |
+
|
274 |
+
Another funky smort one!
|
275 |
+
|
276 |
+
<summary>👈 Secret Recipe</summary>
|
277 |
+
|
278 |
+
```bash
|
279 |
+
#!/usr/bin/env bash
|
280 |
+
|
281 |
+
# Repeating Layers [0-93]
|
282 |
+
|
283 |
+
custom="
|
284 |
+
# Attention
|
285 |
+
blk\..*\.attn_q.*=iq6_k
|
286 |
+
blk\..*\.attn_k.*=q8_0
|
287 |
+
blk\..*\.attn_v.*=q8_0
|
288 |
+
blk\..*\.attn_output.*=iq6_k
|
289 |
+
|
290 |
+
# Routed Experts
|
291 |
+
blk\.(0|1|2|3)\.ffn_down_exps\.weight=iq5_ks
|
292 |
+
blk\.(0|1|2|3)\.ffn_(gate|up)_exps\.weight=iq4_ks
|
293 |
+
blk\..*\.ffn_down_exps\.weight=iq4_ks
|
294 |
+
blk\..*\.ffn_(gate|up)_exps\.weight=iq3_ks
|
295 |
+
|
296 |
+
# Token Embedding
|
297 |
+
token_embd\.weight=iq4_k
|
298 |
+
output\.weight=iq6_k
|
299 |
+
"
|
300 |
+
|
301 |
+
custom=$(
|
302 |
+
echo "$custom" | grep -v '^#' | \
|
303 |
+
sed -Ez 's:\n+:,:g;s:,$::;s:^,::'
|
304 |
+
)
|
305 |
+
|
306 |
+
numactl -N 0 -m 0 \
|
307 |
+
./build/bin/llama-quantize \
|
308 |
+
--custom-q "$custom" \
|
309 |
+
--imatrix /mnt/raid/models/ubergarm/Qwen3-235B-A22B-Instruct-2507-GGUF/imatrix-Qwen3-235B-A22B-Instruct-2507-BF16.dat \
|
310 |
+
/mnt/raid/models/ubergarm/Qwen3-235B-A22B-Instruct-2507-GGUF/Qwen3-235B-A22B-Instruct-2507-BF16-00001-of-00010.gguf \
|
311 |
+
/mnt/raid/models/ubergarm/Qwen3-235B-A22B-Instruct-2507-GGUF/Qwen3-235B-A22B-Instruct-2507-IQ3_KS.gguf \
|
312 |
+
IQ3_KS \
|
313 |
+
192
|
314 |
+
```
|
315 |
+
|
316 |
+
</details>
|
317 |
+
|
318 |
+
|
319 |
## `IQ2_KL` 81.866 GiB (2.991 BPW)
|
320 |
Final estimate: PPL = 4.7912 +/- 0.02910
|
321 |
|