Create configuration_minGRULM.py
Browse files- configuration_minGRULM.py +30 -0
configuration_minGRULM.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import PretrainedConfig
|
2 |
+
|
3 |
+
class MinGRULMConfig(PretrainedConfig):
|
4 |
+
model_type = "mingru"
|
5 |
+
|
6 |
+
def __init__(
|
7 |
+
self,
|
8 |
+
vocab_size=50257,
|
9 |
+
d_model=512,
|
10 |
+
ff_mult=4,
|
11 |
+
min_gru_expansion=1.5,
|
12 |
+
expand=2.0,
|
13 |
+
depth=12,
|
14 |
+
n_layer=12,
|
15 |
+
pad_vocab_size_multiple=8,
|
16 |
+
initializer_range=0.02,
|
17 |
+
hidden_size = 512,
|
18 |
+
**kwargs,
|
19 |
+
):
|
20 |
+
self.vocab_size = vocab_size
|
21 |
+
self.d_model = d_model
|
22 |
+
self.ff_mult = ff_mult
|
23 |
+
self.min_gru_expansion = min_gru_expansion
|
24 |
+
self.expand = expand
|
25 |
+
self.depth = depth
|
26 |
+
self.n_layer = n_layer
|
27 |
+
self.hidden_size = hidden_size
|
28 |
+
self.pad_vocab_size_multiple = pad_vocab_size_multiple
|
29 |
+
self.initializer_range = initializer_range
|
30 |
+
super().__init__(**kwargs)
|