ltg
/

norbert4-xlarge

Model card Files Files and versions

davda54 commited on Jun 7

Commit

42ee330

·

verified ·

1 Parent(s): a8b05e0

FlashAttention support

Files changed (1) hide show

configuration_gptbert.py +1 -49

configuration_gptbert.py CHANGED Viewed

@@ -14,55 +14,7 @@ class GptBertConfig(PretrainedConfig):
         **kwargs
     ):
         super().__init__(**kwargs)
-        self.model: str
-        # General information
-        self.model = "base"
-        # Vocabulary
-        self.vocab_size = 16384
-        self.max_sequence_length = 512
-        # Model dimensions
-        self.hidden_size = 768
-        self.intermediate_size = 2048
-        self.num_attention_heads = 12
-        self.num_layers = 12
-        self.d_qk = 64
-        # Dropout probabilities
-        self.embedding_dropout_p = 0.1
-        self.attention_probabilities_dropout_p = 0.1
-        self.attention_output_dropout_p = 0.1
-        self.feed_forward_dropout_p = 0.1
-        self.attention_dropout = 0.1
-        self.hidden_dropout_prob = 0.2
-        # Position Emebedding
-        self.rope_theta = 160_000
-        # Norms
-        self.word_norm_eps = 1e-7
-        self.word_norm_affine = False
-        self.attention_pre_norm_eps = 1e-7
-        self.attention_pre_norm_affine = False
-        self.attention_inter_norm_eps = 1e-7
-        self.attention_inter_norm_affine = True
-        self.feed_forward_pre_norm_eps = 1e-7
-        self.feed_forward_pre_norm_affine = False
-        self.feed_forward_inter_norm_eps = 1e-7
-        self.feed_forward_inter_norm_affine = False
-        self.classifier_pre_norm_eps = 1e-7
-        self.classifier_pre_norm_affine = False
-        self.classifier_post_norm_eps = 1e-7
-        self.classifier_post_norm_affine = False
         if config_file is not None:
             if type(config_file) is str:

         **kwargs
     ):
         super().__init__(**kwargs)
+        self.model = "norbert4"
         if config_file is not None:
             if type(config_file) is str: