moondream
/

moondream-2b-2025-04-14-4bit

snowclipsed commited on 9 days ago

Commit

595a8a6

1 Parent(s): a89c592

remove dtype as input in weights

Files changed (3) hide show

layers.py CHANGED Viewed

@@ -36,7 +36,7 @@ class QuantizedLinear(nn.Module):
         self,
         in_features: int,
         out_features: int,
-        dtype: torch.dtype,
     ):
         # TODO: Take group_size as an input instead of hardcoding it here.
         super().__init__()
@@ -46,7 +46,7 @@ class QuantizedLinear(nn.Module):
             {
                 "packed": nn.Parameter(
                     torch.empty(
-                        out_features * in_features // (128 * 2), 128, dtype=torch.uint8
                     ),
                     requires_grad=False,
                 ),

         self,
         in_features: int,
         out_features: int,
+        dtype: torch.dtype = torch.uint8,
     ):
         # TODO: Take group_size as an input instead of hardcoding it here.
         super().__init__()
             {
                 "packed": nn.Parameter(
                     torch.empty(
+                        out_features * in_features // (128 * 2), 128, dtype=dtype
                     ),
                     requires_grad=False,
                 ),

text.py CHANGED Viewed

@@ -152,9 +152,8 @@ def _lm_head(hidden_BTC: torch.Tensor, w: nn.Module):
     return logits
-def build_text_model(config: TextConfig, dtype: torch.dtype) -> nn.Module:
     qkv_dim = int(config.dim * (1 + 2 * config.n_kv_heads / config.n_heads))
-    linear_cls = QuantizedLinear if config.group_size is not None else nn.Linear
     text = nn.ModuleDict(
         {
@@ -165,18 +164,18 @@ def build_text_model(config: TextConfig, dtype: torch.dtype) -> nn.Module:
                             "ln": nn.LayerNorm(config.dim, dtype=dtype),
                             "attn": nn.ModuleDict(
                                 {
-                                    "qkv": linear_cls(config.dim, qkv_dim, dtype=dtype),
-                                    "proj": linear_cls(
                                         config.dim, config.dim, dtype=dtype
                                     ),
                                 }
                             ),
                             "mlp": nn.ModuleDict(
                                 {
-                                    "fc1": linear_cls(
                                         config.dim, config.ff_dim, dtype=dtype
                                     ),
-                                    "fc2": linear_cls(
                                         config.ff_dim, config.dim, dtype=dtype
                                     ),
                                 }

     return logits
+def build_text_model(config: TextConfig, dtype: torch.dtype = torch.float16) -> nn.Module:
     qkv_dim = int(config.dim * (1 + 2 * config.n_kv_heads / config.n_heads))
     text = nn.ModuleDict(
         {
                             "ln": nn.LayerNorm(config.dim, dtype=dtype),
                             "attn": nn.ModuleDict(
                                 {
+                                    "qkv": QuantizedLinear(config.dim, qkv_dim, dtype=dtype),
+                                    "proj": QuantizedLinear(
                                         config.dim, config.dim, dtype=dtype
                                     ),
                                 }
                             ),
                             "mlp": nn.ModuleDict(
                                 {
+                                    "fc1": QuantizedLinear(
                                         config.dim, config.ff_dim, dtype=dtype
                                     ),
+                                    "fc2": QuantizedLinear(
                                         config.ff_dim, config.dim, dtype=dtype
                                     ),
                                 }

weights.py CHANGED Viewed

@@ -175,7 +175,7 @@ def load_weights_from_safetensors(weights_file: str, model: nn.Module) -> None:
         linear_dtype = torch.int8 if is_quantized else torch.float16
         model.text = build_text_model(
-            TextConfig, linear_dtype=linear_dtype, layernorm_dtype=layernorm_dtype
         )
         if model.setup_caches_flag:
             model._setup_caches()
@@ -212,7 +212,7 @@ def load_weights_from_pt(weights_file: str, model: nn.Module) -> None:
     linear_dtype = torch.int8 if is_quantized else torch.float16
     model.text = build_text_model(
-        TextConfig, linear_dtype=linear_dtype, layernorm_dtype=layernorm_dtype
     )
     if model.setup_caches_flag:
         model._setup_caches()

         linear_dtype = torch.int8 if is_quantized else torch.float16
         model.text = build_text_model(
+            TextConfig
         )
         if model.setup_caches_flag:
             model._setup_caches()
     linear_dtype = torch.int8 if is_quantized else torch.float16
     model.text = build_text_model(
+        TextConfig
     )
     if model.setup_caches_flag:
         model._setup_caches()