ubergarm commited on
Commit
d968e06
·
1 Parent(s): 1ae325a

fixup IQ4_KSS recipe

Browse files
Files changed (1) hide show
  1. README.md +11 -3
README.md CHANGED
@@ -135,9 +135,17 @@ blk\..*\.ffn_down_shexp\.weight=q6_0
135
  blk\..*\.ffn_(gate|up)_shexp\.weight=iq5_ks
136
 
137
  # Routed Experts Layers [1-46]
138
- blk\..*\.ffn_down_exps\.weight=iq4_xs
 
 
 
139
  blk\..*\.ffn_(gate|up)_exps\.weight=iq4_kss
140
 
 
 
 
 
 
141
  # Non-Repeating Layers
142
  token_embd\.weight=iq4_k
143
  output\.weight=iq6_k
@@ -148,11 +156,11 @@ custom=$(
148
  sed -Ez 's:\n+:,:g;s:,$::;s:^,::'
149
  )
150
 
151
- numactl -N 1 -m 1 \
152
  ./build/bin/llama-quantize \
153
  --custom-q "$custom" \
154
  --imatrix /mnt/raid/models/ubergarm/GLM-4.5-Air-GGUF/imatrix-GLM-4.5-Air-BF16.dat \
155
- /mnt/raid/models/ubergarm/GLM-4.5-Air-GGUF/GLM-4.5-Air-128x8.1B-BF16-00001-of-00005.gguf \
156
  /mnt/raid/models/ubergarm/GLM-4.5-Air-GGUF/GLM-4.5-Air-IQ4_KSS.gguf \
157
  IQ4_KSS \
158
  192
 
135
  blk\..*\.ffn_(gate|up)_shexp\.weight=iq5_ks
136
 
137
  # Routed Experts Layers [1-46]
138
+ #blk\.(1|46)\.ffn_down_exps\.weight=q8_0
139
+ #blk\.(1|46)\.ffn_(gate|up)_exps\.weight=q8_0
140
+
141
+ blk\..*\.ffn_down_exps\.weight=iq4_nl
142
  blk\..*\.ffn_(gate|up)_exps\.weight=iq4_kss
143
 
144
+ # NextN MTP Layer [46]
145
+ blk\..*\.nextn\.embed_tokens\.weight=iq5_ks
146
+ blk\..*\.nextn\.shared_head_head\.weight=iq5_ks
147
+ blk\..*\.nextn\.eh_proj\.weight=q8_0
148
+
149
  # Non-Repeating Layers
150
  token_embd\.weight=iq4_k
151
  output\.weight=iq6_k
 
156
  sed -Ez 's:\n+:,:g;s:,$::;s:^,::'
157
  )
158
 
159
+ numactl -N 0 -m 0 \
160
  ./build/bin/llama-quantize \
161
  --custom-q "$custom" \
162
  --imatrix /mnt/raid/models/ubergarm/GLM-4.5-Air-GGUF/imatrix-GLM-4.5-Air-BF16.dat \
163
+ /mnt/raid/models/ubergarm/GLM-4.5-Air-GGUF/GLM-4.5-Air-128x9.4B-BF16-00001-of-00005.gguf \
164
  /mnt/raid/models/ubergarm/GLM-4.5-Air-GGUF/GLM-4.5-Air-IQ4_KSS.gguf \
165
  IQ4_KSS \
166
  192