FlashAttention support
Browse files- configuration_gptbert.py +1 -49
configuration_gptbert.py
CHANGED
@@ -14,55 +14,7 @@ class GptBertConfig(PretrainedConfig):
|
|
14 |
**kwargs
|
15 |
):
|
16 |
super().__init__(**kwargs)
|
17 |
-
|
18 |
-
self.model: str
|
19 |
-
|
20 |
-
# General information
|
21 |
-
self.model = "base"
|
22 |
-
|
23 |
-
# Vocabulary
|
24 |
-
self.vocab_size = 16384
|
25 |
-
self.max_sequence_length = 512
|
26 |
-
|
27 |
-
# Model dimensions
|
28 |
-
self.hidden_size = 768
|
29 |
-
self.intermediate_size = 2048
|
30 |
-
self.num_attention_heads = 12
|
31 |
-
self.num_layers = 12
|
32 |
-
self.d_qk = 64
|
33 |
-
|
34 |
-
# Dropout probabilities
|
35 |
-
self.embedding_dropout_p = 0.1
|
36 |
-
self.attention_probabilities_dropout_p = 0.1
|
37 |
-
self.attention_output_dropout_p = 0.1
|
38 |
-
self.feed_forward_dropout_p = 0.1
|
39 |
-
self.attention_dropout = 0.1
|
40 |
-
self.hidden_dropout_prob = 0.2
|
41 |
-
|
42 |
-
# Position Emebedding
|
43 |
-
self.rope_theta = 160_000
|
44 |
-
|
45 |
-
# Norms
|
46 |
-
self.word_norm_eps = 1e-7
|
47 |
-
self.word_norm_affine = False
|
48 |
-
|
49 |
-
self.attention_pre_norm_eps = 1e-7
|
50 |
-
self.attention_pre_norm_affine = False
|
51 |
-
|
52 |
-
self.attention_inter_norm_eps = 1e-7
|
53 |
-
self.attention_inter_norm_affine = True
|
54 |
-
|
55 |
-
self.feed_forward_pre_norm_eps = 1e-7
|
56 |
-
self.feed_forward_pre_norm_affine = False
|
57 |
-
|
58 |
-
self.feed_forward_inter_norm_eps = 1e-7
|
59 |
-
self.feed_forward_inter_norm_affine = False
|
60 |
-
|
61 |
-
self.classifier_pre_norm_eps = 1e-7
|
62 |
-
self.classifier_pre_norm_affine = False
|
63 |
-
|
64 |
-
self.classifier_post_norm_eps = 1e-7
|
65 |
-
self.classifier_post_norm_affine = False
|
66 |
|
67 |
if config_file is not None:
|
68 |
if type(config_file) is str:
|
|
|
14 |
**kwargs
|
15 |
):
|
16 |
super().__init__(**kwargs)
|
17 |
+
self.model = "norbert4"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
if config_file is not None:
|
20 |
if type(config_file) is str:
|