snowclipsed
commited on
Commit
·
1db79dd
1
Parent(s):
8cb4b5e
add group size as a parameter
Browse files- layers.py +6 -5
- moondream.py +6 -4
- text.py +4 -4
layers.py
CHANGED
@@ -36,26 +36,27 @@ class QuantizedLinear(nn.Module):
|
|
36 |
self,
|
37 |
in_features: int,
|
38 |
out_features: int,
|
|
|
39 |
dtype: torch.dtype = torch.uint8,
|
40 |
):
|
41 |
-
# TODO: Take group_size as an input instead of hardcoding it here.
|
42 |
super().__init__()
|
43 |
self.in_features = in_features
|
44 |
self.out_features = out_features
|
|
|
45 |
self.weight = nn.ParameterDict(
|
46 |
{
|
47 |
"packed": nn.Parameter(
|
48 |
torch.empty(
|
49 |
-
out_features * in_features // (
|
50 |
),
|
51 |
requires_grad=False,
|
52 |
),
|
53 |
"scale": nn.Parameter(
|
54 |
-
torch.empty(out_features * in_features //
|
55 |
requires_grad=False,
|
56 |
),
|
57 |
"zero_point": nn.Parameter(
|
58 |
-
torch.empty(out_features * in_features //
|
59 |
requires_grad=False,
|
60 |
),
|
61 |
}
|
@@ -86,7 +87,7 @@ class QuantizedLinear(nn.Module):
|
|
86 |
)
|
87 |
|
88 |
del self.weight, self.bias
|
89 |
-
quantize_(self, int4_weight_only(group_size=
|
90 |
self.unpacked = True
|
91 |
torch.cuda.empty_cache()
|
92 |
|
|
|
36 |
self,
|
37 |
in_features: int,
|
38 |
out_features: int,
|
39 |
+
group_size: int = 128,
|
40 |
dtype: torch.dtype = torch.uint8,
|
41 |
):
|
|
|
42 |
super().__init__()
|
43 |
self.in_features = in_features
|
44 |
self.out_features = out_features
|
45 |
+
self.group_size = group_size
|
46 |
self.weight = nn.ParameterDict(
|
47 |
{
|
48 |
"packed": nn.Parameter(
|
49 |
torch.empty(
|
50 |
+
out_features * in_features // (group_size * 2), group_size, dtype=dtype
|
51 |
),
|
52 |
requires_grad=False,
|
53 |
),
|
54 |
"scale": nn.Parameter(
|
55 |
+
torch.empty(out_features * in_features // group_size, 1),
|
56 |
requires_grad=False,
|
57 |
),
|
58 |
"zero_point": nn.Parameter(
|
59 |
+
torch.empty(out_features * in_features // group_size, 1),
|
60 |
requires_grad=False,
|
61 |
),
|
62 |
}
|
|
|
87 |
)
|
88 |
|
89 |
del self.weight, self.bias
|
90 |
+
quantize_(self, int4_weight_only(group_size=self.group_size))
|
91 |
self.unpacked = True
|
92 |
torch.cuda.empty_cache()
|
93 |
|
moondream.py
CHANGED
@@ -80,31 +80,33 @@ class MoondreamModel(nn.Module):
|
|
80 |
self.region = nn.ModuleDict(
|
81 |
{
|
82 |
"coord_encoder": QuantizedLinear(
|
83 |
-
config.region.coord_feat_dim, config.region.dim, dtype=dtype
|
84 |
),
|
85 |
"coord_decoder": nn.ModuleDict(
|
86 |
{
|
87 |
"fc1": QuantizedLinear(
|
88 |
-
config.region.dim, config.region.inner_dim, dtype=dtype
|
89 |
),
|
90 |
"fc2": QuantizedLinear(
|
91 |
config.region.inner_dim,
|
92 |
config.region.coord_out_dim,
|
|
|
93 |
dtype=dtype,
|
94 |
),
|
95 |
}
|
96 |
),
|
97 |
"size_encoder": QuantizedLinear(
|
98 |
-
config.region.size_feat_dim, config.region.dim, dtype=dtype
|
99 |
),
|
100 |
"size_decoder": nn.ModuleDict(
|
101 |
{
|
102 |
"fc1": QuantizedLinear(
|
103 |
-
config.region.dim, config.region.inner_dim, dtype=dtype
|
104 |
),
|
105 |
"fc2": QuantizedLinear(
|
106 |
config.region.inner_dim,
|
107 |
config.region.size_out_dim,
|
|
|
108 |
dtype=dtype,
|
109 |
),
|
110 |
}
|
|
|
80 |
self.region = nn.ModuleDict(
|
81 |
{
|
82 |
"coord_encoder": QuantizedLinear(
|
83 |
+
config.region.coord_feat_dim, config.region.dim, group_size=config.text.group_size, dtype=dtype
|
84 |
),
|
85 |
"coord_decoder": nn.ModuleDict(
|
86 |
{
|
87 |
"fc1": QuantizedLinear(
|
88 |
+
config.region.dim, config.region.inner_dim, group_size=config.text.group_size, dtype=dtype
|
89 |
),
|
90 |
"fc2": QuantizedLinear(
|
91 |
config.region.inner_dim,
|
92 |
config.region.coord_out_dim,
|
93 |
+
group_size=config.text.group_size,
|
94 |
dtype=dtype,
|
95 |
),
|
96 |
}
|
97 |
),
|
98 |
"size_encoder": QuantizedLinear(
|
99 |
+
config.region.size_feat_dim, config.region.dim, group_size=config.text.group_size, dtype=dtype
|
100 |
),
|
101 |
"size_decoder": nn.ModuleDict(
|
102 |
{
|
103 |
"fc1": QuantizedLinear(
|
104 |
+
config.region.dim, config.region.inner_dim, group_size=config.text.group_size, dtype=dtype
|
105 |
),
|
106 |
"fc2": QuantizedLinear(
|
107 |
config.region.inner_dim,
|
108 |
config.region.size_out_dim,
|
109 |
+
group_size=config.text.group_size,
|
110 |
dtype=dtype,
|
111 |
),
|
112 |
}
|
text.py
CHANGED
@@ -164,19 +164,19 @@ def build_text_model(config: TextConfig, dtype: torch.dtype = torch.float16) ->
|
|
164 |
"ln": nn.LayerNorm(config.dim, dtype=dtype),
|
165 |
"attn": nn.ModuleDict(
|
166 |
{
|
167 |
-
"qkv": QuantizedLinear(config.dim, qkv_dim, dtype=dtype),
|
168 |
"proj": QuantizedLinear(
|
169 |
-
config.dim, config.dim, dtype=dtype
|
170 |
),
|
171 |
}
|
172 |
),
|
173 |
"mlp": nn.ModuleDict(
|
174 |
{
|
175 |
"fc1": QuantizedLinear(
|
176 |
-
config.dim, config.ff_dim, dtype=dtype
|
177 |
),
|
178 |
"fc2": QuantizedLinear(
|
179 |
-
config.ff_dim, config.dim, dtype=dtype
|
180 |
),
|
181 |
}
|
182 |
),
|
|
|
164 |
"ln": nn.LayerNorm(config.dim, dtype=dtype),
|
165 |
"attn": nn.ModuleDict(
|
166 |
{
|
167 |
+
"qkv": QuantizedLinear(config.dim, qkv_dim, group_size=config.text.group_size, dtype=dtype),
|
168 |
"proj": QuantizedLinear(
|
169 |
+
config.dim, config.dim, group_size=config.text.group_size, dtype=dtype
|
170 |
),
|
171 |
}
|
172 |
),
|
173 |
"mlp": nn.ModuleDict(
|
174 |
{
|
175 |
"fc1": QuantizedLinear(
|
176 |
+
config.dim, config.ff_dim, group_size=config.text.group_size, dtype=dtype
|
177 |
),
|
178 |
"fc2": QuantizedLinear(
|
179 |
+
config.ff_dim, config.dim, group_size=config.text.group_size, dtype=dtype
|
180 |
),
|
181 |
}
|
182 |
),
|