snowclipsed commited on
Commit
1db79dd
·
1 Parent(s): 8cb4b5e

add group size as a parameter

Browse files
Files changed (3) hide show
  1. layers.py +6 -5
  2. moondream.py +6 -4
  3. text.py +4 -4
layers.py CHANGED
@@ -36,26 +36,27 @@ class QuantizedLinear(nn.Module):
36
  self,
37
  in_features: int,
38
  out_features: int,
 
39
  dtype: torch.dtype = torch.uint8,
40
  ):
41
- # TODO: Take group_size as an input instead of hardcoding it here.
42
  super().__init__()
43
  self.in_features = in_features
44
  self.out_features = out_features
 
45
  self.weight = nn.ParameterDict(
46
  {
47
  "packed": nn.Parameter(
48
  torch.empty(
49
- out_features * in_features // (128 * 2), 128, dtype=dtype
50
  ),
51
  requires_grad=False,
52
  ),
53
  "scale": nn.Parameter(
54
- torch.empty(out_features * in_features // 128, 1),
55
  requires_grad=False,
56
  ),
57
  "zero_point": nn.Parameter(
58
- torch.empty(out_features * in_features // 128, 1),
59
  requires_grad=False,
60
  ),
61
  }
@@ -86,7 +87,7 @@ class QuantizedLinear(nn.Module):
86
  )
87
 
88
  del self.weight, self.bias
89
- quantize_(self, int4_weight_only(group_size=128))
90
  self.unpacked = True
91
  torch.cuda.empty_cache()
92
 
 
36
  self,
37
  in_features: int,
38
  out_features: int,
39
+ group_size: int = 128,
40
  dtype: torch.dtype = torch.uint8,
41
  ):
 
42
  super().__init__()
43
  self.in_features = in_features
44
  self.out_features = out_features
45
+ self.group_size = group_size
46
  self.weight = nn.ParameterDict(
47
  {
48
  "packed": nn.Parameter(
49
  torch.empty(
50
+ out_features * in_features // (group_size * 2), group_size, dtype=dtype
51
  ),
52
  requires_grad=False,
53
  ),
54
  "scale": nn.Parameter(
55
+ torch.empty(out_features * in_features // group_size, 1),
56
  requires_grad=False,
57
  ),
58
  "zero_point": nn.Parameter(
59
+ torch.empty(out_features * in_features // group_size, 1),
60
  requires_grad=False,
61
  ),
62
  }
 
87
  )
88
 
89
  del self.weight, self.bias
90
+ quantize_(self, int4_weight_only(group_size=self.group_size))
91
  self.unpacked = True
92
  torch.cuda.empty_cache()
93
 
moondream.py CHANGED
@@ -80,31 +80,33 @@ class MoondreamModel(nn.Module):
80
  self.region = nn.ModuleDict(
81
  {
82
  "coord_encoder": QuantizedLinear(
83
- config.region.coord_feat_dim, config.region.dim, dtype=dtype
84
  ),
85
  "coord_decoder": nn.ModuleDict(
86
  {
87
  "fc1": QuantizedLinear(
88
- config.region.dim, config.region.inner_dim, dtype=dtype
89
  ),
90
  "fc2": QuantizedLinear(
91
  config.region.inner_dim,
92
  config.region.coord_out_dim,
 
93
  dtype=dtype,
94
  ),
95
  }
96
  ),
97
  "size_encoder": QuantizedLinear(
98
- config.region.size_feat_dim, config.region.dim, dtype=dtype
99
  ),
100
  "size_decoder": nn.ModuleDict(
101
  {
102
  "fc1": QuantizedLinear(
103
- config.region.dim, config.region.inner_dim, dtype=dtype
104
  ),
105
  "fc2": QuantizedLinear(
106
  config.region.inner_dim,
107
  config.region.size_out_dim,
 
108
  dtype=dtype,
109
  ),
110
  }
 
80
  self.region = nn.ModuleDict(
81
  {
82
  "coord_encoder": QuantizedLinear(
83
+ config.region.coord_feat_dim, config.region.dim, group_size=config.text.group_size, dtype=dtype
84
  ),
85
  "coord_decoder": nn.ModuleDict(
86
  {
87
  "fc1": QuantizedLinear(
88
+ config.region.dim, config.region.inner_dim, group_size=config.text.group_size, dtype=dtype
89
  ),
90
  "fc2": QuantizedLinear(
91
  config.region.inner_dim,
92
  config.region.coord_out_dim,
93
+ group_size=config.text.group_size,
94
  dtype=dtype,
95
  ),
96
  }
97
  ),
98
  "size_encoder": QuantizedLinear(
99
+ config.region.size_feat_dim, config.region.dim, group_size=config.text.group_size, dtype=dtype
100
  ),
101
  "size_decoder": nn.ModuleDict(
102
  {
103
  "fc1": QuantizedLinear(
104
+ config.region.dim, config.region.inner_dim, group_size=config.text.group_size, dtype=dtype
105
  ),
106
  "fc2": QuantizedLinear(
107
  config.region.inner_dim,
108
  config.region.size_out_dim,
109
+ group_size=config.text.group_size,
110
  dtype=dtype,
111
  ),
112
  }
text.py CHANGED
@@ -164,19 +164,19 @@ def build_text_model(config: TextConfig, dtype: torch.dtype = torch.float16) ->
164
  "ln": nn.LayerNorm(config.dim, dtype=dtype),
165
  "attn": nn.ModuleDict(
166
  {
167
- "qkv": QuantizedLinear(config.dim, qkv_dim, dtype=dtype),
168
  "proj": QuantizedLinear(
169
- config.dim, config.dim, dtype=dtype
170
  ),
171
  }
172
  ),
173
  "mlp": nn.ModuleDict(
174
  {
175
  "fc1": QuantizedLinear(
176
- config.dim, config.ff_dim, dtype=dtype
177
  ),
178
  "fc2": QuantizedLinear(
179
- config.ff_dim, config.dim, dtype=dtype
180
  ),
181
  }
182
  ),
 
164
  "ln": nn.LayerNorm(config.dim, dtype=dtype),
165
  "attn": nn.ModuleDict(
166
  {
167
+ "qkv": QuantizedLinear(config.dim, qkv_dim, group_size=config.text.group_size, dtype=dtype),
168
  "proj": QuantizedLinear(
169
+ config.dim, config.dim, group_size=config.text.group_size, dtype=dtype
170
  ),
171
  }
172
  ),
173
  "mlp": nn.ModuleDict(
174
  {
175
  "fc1": QuantizedLinear(
176
+ config.dim, config.ff_dim, group_size=config.text.group_size, dtype=dtype
177
  ),
178
  "fc2": QuantizedLinear(
179
+ config.ff_dim, config.dim, group_size=config.text.group_size, dtype=dtype
180
  ),
181
  }
182
  ),