davda54 commited on
Commit
42ee330
·
verified ·
1 Parent(s): a8b05e0

FlashAttention support

Browse files
Files changed (1) hide show
  1. configuration_gptbert.py +1 -49
configuration_gptbert.py CHANGED
@@ -14,55 +14,7 @@ class GptBertConfig(PretrainedConfig):
14
  **kwargs
15
  ):
16
  super().__init__(**kwargs)
17
-
18
- self.model: str
19
-
20
- # General information
21
- self.model = "base"
22
-
23
- # Vocabulary
24
- self.vocab_size = 16384
25
- self.max_sequence_length = 512
26
-
27
- # Model dimensions
28
- self.hidden_size = 768
29
- self.intermediate_size = 2048
30
- self.num_attention_heads = 12
31
- self.num_layers = 12
32
- self.d_qk = 64
33
-
34
- # Dropout probabilities
35
- self.embedding_dropout_p = 0.1
36
- self.attention_probabilities_dropout_p = 0.1
37
- self.attention_output_dropout_p = 0.1
38
- self.feed_forward_dropout_p = 0.1
39
- self.attention_dropout = 0.1
40
- self.hidden_dropout_prob = 0.2
41
-
42
- # Position Emebedding
43
- self.rope_theta = 160_000
44
-
45
- # Norms
46
- self.word_norm_eps = 1e-7
47
- self.word_norm_affine = False
48
-
49
- self.attention_pre_norm_eps = 1e-7
50
- self.attention_pre_norm_affine = False
51
-
52
- self.attention_inter_norm_eps = 1e-7
53
- self.attention_inter_norm_affine = True
54
-
55
- self.feed_forward_pre_norm_eps = 1e-7
56
- self.feed_forward_pre_norm_affine = False
57
-
58
- self.feed_forward_inter_norm_eps = 1e-7
59
- self.feed_forward_inter_norm_affine = False
60
-
61
- self.classifier_pre_norm_eps = 1e-7
62
- self.classifier_pre_norm_affine = False
63
-
64
- self.classifier_post_norm_eps = 1e-7
65
- self.classifier_post_norm_affine = False
66
 
67
  if config_file is not None:
68
  if type(config_file) is str:
 
14
  **kwargs
15
  ):
16
  super().__init__(**kwargs)
17
+ self.model = "norbert4"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  if config_file is not None:
20
  if type(config_file) is str: