Add IQ2_KL
Browse files
README.md
CHANGED
@@ -298,10 +298,8 @@ custom=$(
|
|
298 |
</details>
|
299 |
|
300 |
|
301 |
-
## `IQ2_KL`
|
302 |
-
Final estimate: PPL = 7.
|
303 |
-
|
304 |
-
*NOTE* this test recipe may change before uploading!
|
305 |
|
306 |
<details>
|
307 |
|
@@ -314,14 +312,20 @@ custom="
|
|
314 |
# 48 Repeating Layers [0-47]
|
315 |
|
316 |
# Attention
|
|
|
|
|
|
|
|
|
|
|
317 |
blk\..*\.attn_q.*=iq5_k
|
318 |
blk\..*\.attn_k.*=iq6_k
|
319 |
blk\..*\.attn_v.*=iq6_k
|
320 |
blk\..*\.attn_output.*=iq5_k
|
321 |
|
322 |
# Routed Experts
|
323 |
-
blk\.(0|
|
324 |
-
blk\.(0|
|
|
|
325 |
blk\..*\.ffn_down_exps\.weight=iq3_ks
|
326 |
blk\..*\.ffn_(gate|up)_exps\.weight=iq2_kl
|
327 |
|
@@ -335,10 +339,9 @@ custom=$(
|
|
335 |
sed -Ez 's:\n+:,:g;s:,$::;s:^,::'
|
336 |
)
|
337 |
|
338 |
-
#--imatrix /mnt/raid/models/ubergarm/Qwen3-30B-A3B-Instruct-2507-GGUF/imatrix-Qwen3-30B-A3B-Instruct-2507-BF16.dat \
|
339 |
./build/bin/llama-quantize \
|
340 |
--custom-q "$custom" \
|
341 |
-
--imatrix /mnt/raid/models/ubergarm/Qwen3-30B-A3B-Instruct-2507-GGUF/imatrix-Qwen3-30B-A3B-Instruct-2507-BF16.dat \
|
342 |
/mnt/raid/models/ubergarm/Qwen3-30B-A3B-Instruct-2507-GGUF/Qwen3-30B-A3B-Instruct-2507-BF16-00001-of-00002.gguf \
|
343 |
/mnt/raid/models/ubergarm/Qwen3-30B-A3B-Instruct-2507-GGUF/Qwen3-30B-A3B-Instruct-2507-IQ2_KL.gguf \
|
344 |
IQ2_KL \
|
|
|
298 |
</details>
|
299 |
|
300 |
|
301 |
+
## `IQ2_KL` 11.516 GiB (3.240 BPW)
|
302 |
+
Final estimate: PPL = 7.7121 +/- 0.05402
|
|
|
|
|
303 |
|
304 |
<details>
|
305 |
|
|
|
312 |
# 48 Repeating Layers [0-47]
|
313 |
|
314 |
# Attention
|
315 |
+
blk\.(0)\.attn_q.*=q8_0
|
316 |
+
blk\.(0)\.attn_k.*=q8_0
|
317 |
+
blk\.(0)\.attn_v.*=q8_0
|
318 |
+
blk\.(0)\.attn_output.*=q8_0
|
319 |
+
|
320 |
blk\..*\.attn_q.*=iq5_k
|
321 |
blk\..*\.attn_k.*=iq6_k
|
322 |
blk\..*\.attn_v.*=iq6_k
|
323 |
blk\..*\.attn_output.*=iq5_k
|
324 |
|
325 |
# Routed Experts
|
326 |
+
blk\.(0|47)\.ffn_down_exps\.weight=q8_0
|
327 |
+
blk\.(0|47)\.ffn_(gate|up)_exps\.weight=q8_0
|
328 |
+
|
329 |
blk\..*\.ffn_down_exps\.weight=iq3_ks
|
330 |
blk\..*\.ffn_(gate|up)_exps\.weight=iq2_kl
|
331 |
|
|
|
339 |
sed -Ez 's:\n+:,:g;s:,$::;s:^,::'
|
340 |
)
|
341 |
|
|
|
342 |
./build/bin/llama-quantize \
|
343 |
--custom-q "$custom" \
|
344 |
+
--imatrix /mnt/raid/models/ubergarm/Qwen3-30B-A3B-Instruct-2507-GGUF/imatrix-eaddario-combined-all-medium-Qwen3-30B-A3B-Instruct-2507-BF16.dat \
|
345 |
/mnt/raid/models/ubergarm/Qwen3-30B-A3B-Instruct-2507-GGUF/Qwen3-30B-A3B-Instruct-2507-BF16-00001-of-00002.gguf \
|
346 |
/mnt/raid/models/ubergarm/Qwen3-30B-A3B-Instruct-2507-GGUF/Qwen3-30B-A3B-Instruct-2507-IQ2_KL.gguf \
|
347 |
IQ2_KL \
|