fix: MLP layer names.
Browse files- modeling_falcon.py +4 -4
modeling_falcon.py
CHANGED
@@ -763,18 +763,18 @@ class FalconMLP(nn.Module):
|
|
763 |
super().__init__()
|
764 |
hidden_size = config.hidden_size
|
765 |
|
766 |
-
self.
|
767 |
hidden_size, config.ff_factor * hidden_size, bias=config.bias
|
768 |
)
|
769 |
self.act = nn.GELU()
|
770 |
-
self.
|
771 |
config.ff_factor * hidden_size, hidden_size, bias=config.bias
|
772 |
)
|
773 |
self.hidden_dropout = config.hidden_dropout
|
774 |
|
775 |
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
776 |
-
x = self.act(self.
|
777 |
-
x = self.
|
778 |
return x
|
779 |
|
780 |
FALCON_ATTENTION_CLASSES = {
|
|
|
763 |
super().__init__()
|
764 |
hidden_size = config.hidden_size
|
765 |
|
766 |
+
self.dense_h_to_4h = FalconLinear(
|
767 |
hidden_size, config.ff_factor * hidden_size, bias=config.bias
|
768 |
)
|
769 |
self.act = nn.GELU()
|
770 |
+
self.dense_4h_to_h = FalconLinear(
|
771 |
config.ff_factor * hidden_size, hidden_size, bias=config.bias
|
772 |
)
|
773 |
self.hidden_dropout = config.hidden_dropout
|
774 |
|
775 |
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
776 |
+
x = self.act(self.dense_h_to_4h(x))
|
777 |
+
x = self.dense_4h_to_h(x)
|
778 |
return x
|
779 |
|
780 |
FALCON_ATTENTION_CLASSES = {
|