ubergarm commited on
Commit
b1c3767
·
1 Parent(s): c81859d

Add IQ2_KL

Browse files
Files changed (1) hide show
  1. README.md +11 -8
README.md CHANGED
@@ -298,10 +298,8 @@ custom=$(
298
  </details>
299
 
300
 
301
- ## `IQ2_KL` 10.913 GiB (3.070 BPW)
302
- Final estimate: PPL = 7.7567 +/- 0.05475
303
-
304
- *NOTE* this test recipe may change before uploading!
305
 
306
  <details>
307
 
@@ -314,14 +312,20 @@ custom="
314
  # 48 Repeating Layers [0-47]
315
 
316
  # Attention
 
 
 
 
 
317
  blk\..*\.attn_q.*=iq5_k
318
  blk\..*\.attn_k.*=iq6_k
319
  blk\..*\.attn_v.*=iq6_k
320
  blk\..*\.attn_output.*=iq5_k
321
 
322
  # Routed Experts
323
- blk\.(0|1|2|47)\.ffn_down_exps\.weight=iq4_ks
324
- blk\.(0|1|2|47)\.ffn_(gate|up)_exps\.weight=iq3_ks
 
325
  blk\..*\.ffn_down_exps\.weight=iq3_ks
326
  blk\..*\.ffn_(gate|up)_exps\.weight=iq2_kl
327
 
@@ -335,10 +339,9 @@ custom=$(
335
  sed -Ez 's:\n+:,:g;s:,$::;s:^,::'
336
  )
337
 
338
- #--imatrix /mnt/raid/models/ubergarm/Qwen3-30B-A3B-Instruct-2507-GGUF/imatrix-Qwen3-30B-A3B-Instruct-2507-BF16.dat \
339
  ./build/bin/llama-quantize \
340
  --custom-q "$custom" \
341
- --imatrix /mnt/raid/models/ubergarm/Qwen3-30B-A3B-Instruct-2507-GGUF/imatrix-Qwen3-30B-A3B-Instruct-2507-BF16.dat \
342
  /mnt/raid/models/ubergarm/Qwen3-30B-A3B-Instruct-2507-GGUF/Qwen3-30B-A3B-Instruct-2507-BF16-00001-of-00002.gguf \
343
  /mnt/raid/models/ubergarm/Qwen3-30B-A3B-Instruct-2507-GGUF/Qwen3-30B-A3B-Instruct-2507-IQ2_KL.gguf \
344
  IQ2_KL \
 
298
  </details>
299
 
300
 
301
+ ## `IQ2_KL` 11.516 GiB (3.240 BPW)
302
+ Final estimate: PPL = 7.7121 +/- 0.05402
 
 
303
 
304
  <details>
305
 
 
312
  # 48 Repeating Layers [0-47]
313
 
314
  # Attention
315
+ blk\.(0)\.attn_q.*=q8_0
316
+ blk\.(0)\.attn_k.*=q8_0
317
+ blk\.(0)\.attn_v.*=q8_0
318
+ blk\.(0)\.attn_output.*=q8_0
319
+
320
  blk\..*\.attn_q.*=iq5_k
321
  blk\..*\.attn_k.*=iq6_k
322
  blk\..*\.attn_v.*=iq6_k
323
  blk\..*\.attn_output.*=iq5_k
324
 
325
  # Routed Experts
326
+ blk\.(0|47)\.ffn_down_exps\.weight=q8_0
327
+ blk\.(0|47)\.ffn_(gate|up)_exps\.weight=q8_0
328
+
329
  blk\..*\.ffn_down_exps\.weight=iq3_ks
330
  blk\..*\.ffn_(gate|up)_exps\.weight=iq2_kl
331
 
 
339
  sed -Ez 's:\n+:,:g;s:,$::;s:^,::'
340
  )
341
 
 
342
  ./build/bin/llama-quantize \
343
  --custom-q "$custom" \
344
+ --imatrix /mnt/raid/models/ubergarm/Qwen3-30B-A3B-Instruct-2507-GGUF/imatrix-eaddario-combined-all-medium-Qwen3-30B-A3B-Instruct-2507-BF16.dat \
345
  /mnt/raid/models/ubergarm/Qwen3-30B-A3B-Instruct-2507-GGUF/Qwen3-30B-A3B-Instruct-2507-BF16-00001-of-00002.gguf \
346
  /mnt/raid/models/ubergarm/Qwen3-30B-A3B-Instruct-2507-GGUF/Qwen3-30B-A3B-Instruct-2507-IQ2_KL.gguf \
347
  IQ2_KL \