moondream
/

moondream-2b-2025-04-14-4bit

Image-Text-to-Text

Safetensors

GGUF

moondream1

custom_code

Model card Files Files and versions Community

snowclipsed commited on 9 days ago

Commit

e15c30f

1 Parent(s): 8f87bef

remove is_quantized completely

Browse files

Files changed (1) hide show

weights.py +17 -41

weights.py CHANGED Viewed

@@ -6,9 +6,6 @@ import re
 from contextlib import contextmanager
 from typing import Callable, List
-from .text import build_text_model
-from .config import TextConfig
 # Our custom linear has an module named linear, so we add linear to the name
 def add_linear_to_key(k: str) -> str:
@@ -46,7 +43,6 @@ def safetensors_open(safetensors_file: str):
 def _load_weights(
     get_tensor: Callable[[str], torch.Tensor],
     model: nn.Module,
-    is_quantized: bool = False,
 ) -> None:
     """Internal function to load weights using a tensor getter function."""
     model = model.to(dtype=torch.float16)
@@ -111,42 +107,23 @@ def _load_weights(
             }
         )
-    if not is_quantized:
-        for i in range(len(model.text["blocks"])):
-            prefix = f"text_model.transformer.h.{i}"
-            blk = model.text["blocks"][i]
-            weight_map.update(
-                {
-                    f"{prefix}.ln.weight": blk["ln"].weight,
-                    f"{prefix}.ln.bias": blk["ln"].bias,
-                    f"{prefix}.mixer.Wqkv.weight": blk["attn"]["qkv"].weight,
-                    f"{prefix}.mixer.Wqkv.bias": blk["attn"]["qkv"].bias,
-                    f"{prefix}.mixer.out_proj.weight": blk["attn"]["proj"].weight,
-                    f"{prefix}.mixer.out_proj.bias": blk["attn"]["proj"].bias,
-                    f"{prefix}.mlp.fc1.weight": blk["mlp"]["fc1"].weight,
-                    f"{prefix}.mlp.fc1.bias": blk["mlp"]["fc1"].bias,
-                    f"{prefix}.mlp.fc2.weight": blk["mlp"]["fc2"].weight,
-                    f"{prefix}.mlp.fc2.bias": blk["mlp"]["fc2"].bias,
-                }
-            )
-    else:  # add special quantized path. this is specific to how bitblas expects weights to be loaded (.qweight)
-        for i in range(len(model.text["blocks"])):
-            prefix = f"text_model.transformer.h.{i}"
-            blk = model.text["blocks"][i]
-            weight_map.update(
-                {
-                    f"{prefix}.ln.qweight": blk["ln"].weight,
-                    f"{prefix}.ln.bias": blk["ln"].bias,
-                    f"{prefix}.mixer.Wqkv.qweight": blk["attn"]["qkv"].weight,
-                    f"{prefix}.mixer.Wqkv.bias": blk["attn"]["qkv"].bias,
-                    f"{prefix}.mixer.out_proj.qweight": blk["attn"]["proj"].weight,
-                    f"{prefix}.mixer.out_proj.bias": blk["attn"]["proj"].bias,
-                    f"{prefix}.mlp.fc1.qweight": blk["mlp"]["fc1"].weight,
-                    f"{prefix}.mlp.fc1.bias": blk["mlp"]["fc1"].bias,
-                    f"{prefix}.mlp.fc2.qweight": blk["mlp"]["fc2"].weight,
-                    f"{prefix}.mlp.fc2.bias": blk["mlp"]["fc2"].bias,
-                }
-            )
     for key, tensor in weight_map.items():
         tensor.data.copy_(get_tensor(key))
@@ -175,7 +152,6 @@ def load_weights_from_safetensors(weights_file: str, model: nn.Module) -> None:
             _load_weights(
                 lambda x: get_tensor(name_map[x]).to(dtype=torch.float16),
                 model,
-                # is_quantized,
             )

 from contextlib import contextmanager
 from typing import Callable, List
 # Our custom linear has an module named linear, so we add linear to the name
 def add_linear_to_key(k: str) -> str:
 def _load_weights(
     get_tensor: Callable[[str], torch.Tensor],
     model: nn.Module,
 ) -> None:
     """Internal function to load weights using a tensor getter function."""
     model = model.to(dtype=torch.float16)
             }
         )
+    for i in range(len(model.text["blocks"])):
+        prefix = f"text_model.transformer.h.{i}"
+        blk = model.text["blocks"][i]
+        weight_map.update(
+            {
+                f"{prefix}.ln.weight": blk["ln"].weight,
+                f"{prefix}.ln.bias": blk["ln"].bias,
+                f"{prefix}.mixer.Wqkv.weight": blk["attn"]["qkv"].weight,
+                f"{prefix}.mixer.Wqkv.bias": blk["attn"]["qkv"].bias,
+                f"{prefix}.mixer.out_proj.weight": blk["attn"]["proj"].weight,
+                f"{prefix}.mixer.out_proj.bias": blk["attn"]["proj"].bias,
+                f"{prefix}.mlp.fc1.weight": blk["mlp"]["fc1"].weight,
+                f"{prefix}.mlp.fc1.bias": blk["mlp"]["fc1"].bias,
+                f"{prefix}.mlp.fc2.weight": blk["mlp"]["fc2"].weight,
+                f"{prefix}.mlp.fc2.bias": blk["mlp"]["fc2"].bias,
+            }
+        )
     for key, tensor in weight_map.items():
         tensor.data.copy_(get_tensor(key))
             _load_weights(
                 lambda x: get_tensor(name_map[x]).to(dtype=torch.float16),
                 model,
             )