woodchen7 commited on
Commit
be2f791
·
verified ·
1 Parent(s): 3714224

Upload hf_quant_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. hf_quant_config.json +10 -0
hf_quant_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "quantization": {
3
+ "quant_algo": "FP8",
4
+ "kv_cache_quant_algo": null,
5
+ "exclude_modules": [
6
+ "lm_head",
7
+ "model.embed_tokens"
8
+ ]
9
+ }
10
+ }