AngelSlim
/

Deepseek_r1_distill_qwen-32b_fp8_static

Safetensors

qwen2

compressed-tensors

Model card Files Files and versions

xet

Community

woodchen7 commited on Jul 22

Commit

2f64894

verified ·

1 Parent(s): 3398502

Upload config.json with huggingface_hub

Browse files

Files changed (1) hide show

config.json +56 -33

config.json CHANGED Viewed

@@ -1,35 +1,58 @@
 {
-  "architectures": [
-    "Qwen2ForCausalLM"
-  ],
-  "attention_dropout": 0.0,
-  "bos_token_id": 151643,
-  "eos_token_id": 151643,
-  "hidden_act": "silu",
-  "hidden_size": 5120,
-  "initializer_range": 0.02,
-  "intermediate_size": 27648,
-  "max_position_embeddings": 131072,
-  "max_window_layers": 64,
-  "model_type": "qwen2",
-  "num_attention_heads": 40,
-  "num_hidden_layers": 64,
-  "num_key_value_heads": 8,
-  "quantization_config": {
-    "activation_scheme": "static",
-    "ignored_layers": [
-      "lm_head"
     ],
-    "quant_method": "fp8"
-  },
-  "rms_norm_eps": 1e-05,
-  "rope_scaling": null,
-  "rope_theta": 1000000.0,
-  "sliding_window": 131072,
-  "tie_word_embeddings": false,
-  "torch_dtype": "bfloat16",
-  "transformers_version": "4.51.3",
-  "use_cache": false,
-  "use_sliding_window": false,
-  "vocab_size": 152064
-}

 {
+    "architectures": [
+        "Qwen2ForCausalLM"
     ],
+    "attention_dropout": 0.0,
+    "bos_token_id": 151643,
+    "eos_token_id": 151643,
+    "hidden_act": "silu",
+    "hidden_size": 5120,
+    "initializer_range": 0.02,
+    "intermediate_size": 27648,
+    "max_position_embeddings": 131072,
+    "max_window_layers": 64,
+    "model_type": "qwen2",
+    "num_attention_heads": 40,
+    "num_hidden_layers": 64,
+    "num_key_value_heads": 8,
+    "quantization_config": {
+        "config_groups": {
+            "group_0": {
+                "input_activations": {
+                    "dynamic": false,
+                    "num_bits": 8,
+                    "strategy": "tensor",
+                    "type": "float"
+                },
+                "output_activations": null,
+                "targets": [
+                    "Linear"
+                ],
+                "weights": {
+                    "dynamic": false,
+                    "num_bits": 8,
+                    "strategy": "tensor",
+                    "type": "float"
+                }
+            }
+        },
+        "format": "naive-quantized",
+        "kv_cache_scheme": null,
+        "quant_method": "compressed-tensors",
+        "quantization_status": "compressed",
+        "ignored_layers": [
+            "lm_head",
+            "model.embed_tokens"
+        ]
+    },
+    "rms_norm_eps": 1e-05,
+    "rope_scaling": null,
+    "rope_theta": 1000000.0,
+    "sliding_window": 131072,
+    "tie_word_embeddings": false,
+    "torch_dtype": "bfloat16",
+    "transformers_version": "4.51.3",
+    "use_cache": false,
+    "use_sliding_window": false,
+    "vocab_size": 152064
+}