woodchen7 commited on
Commit
cba5f50
·
verified ·
1 Parent(s): ab14604

Upload hf_quant_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. hf_quant_config.json +10 -0
hf_quant_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "quantization": {
3
+ "quant_algo": "FP8",
4
+ "kv_cache_quant_algo": null,
5
+ "exclude_modules": [
6
+ "lm_head",
7
+ "model.embed_tokens"
8
+ ]
9
+ }
10
+ }