jinaai
/

jina-bert-implementation

Team Finetuner commited on Oct 23, 2023

Commit

344bcbc

1 Parent(s): 5ee2c37

chore: update from afe81ca705ca1a5bd6b7d90548fcac068850b2af

Files changed (2) hide show

configuration_bert.py CHANGED Viewed

@@ -84,14 +84,10 @@ class JinaBertConfig(PretrainedConfig):
         emb_pooler (`str`, *optional*, defaults to `None`):
             The function to use for pooling the last layer embeddings to get the sentence embeddings.
             Should be one of `None`, `"mean"`.
-        with_flash (`bool`, *optional*, defaults to `False`):
-            Whether to use triton flash attention. Only works for `triton==2.0.0.dev20230208`.
-            This argument will be deprecated in the future. Use `attention_implementation` instead.
-        attn_implementation (`str`, *optional*, defaults to `None`):
             The implementation of the self-attention layer. Can be one of:
             - `None` for the original implementation,
             - `torch` for the PyTorch SDPA implementation,
-            - `triton` for the Triton Flash implementation. Only works for `triton==2.0.0.dev20230208`
     Examples:
@@ -132,7 +128,6 @@ class JinaBertConfig(PretrainedConfig):
         classifier_dropout=None,
         feed_forward_type="original",
         emb_pooler=None,
-        with_flash=False,
         attn_implementation='torch',
         **kwargs,
     ):
@@ -156,7 +151,6 @@ class JinaBertConfig(PretrainedConfig):
         self.feed_forward_type = feed_forward_type
         self.emb_pooler = emb_pooler
         self.attn_implementation = attn_implementation
-        self.with_flash = with_flash
 class JinaBertOnnxConfig(OnnxConfig):
     @property

         emb_pooler (`str`, *optional*, defaults to `None`):
             The function to use for pooling the last layer embeddings to get the sentence embeddings.
             Should be one of `None`, `"mean"`.
+        attn_implementation (`str`, *optional*, defaults to `"torch"`):
             The implementation of the self-attention layer. Can be one of:
             - `None` for the original implementation,
             - `torch` for the PyTorch SDPA implementation,
     Examples:
         classifier_dropout=None,
         feed_forward_type="original",
         emb_pooler=None,
         attn_implementation='torch',
         **kwargs,
     ):
         self.feed_forward_type = feed_forward_type
         self.emb_pooler = emb_pooler
         self.attn_implementation = attn_implementation
 class JinaBertOnnxConfig(OnnxConfig):
     @property

modeling_bert.py CHANGED Viewed

@@ -273,9 +273,6 @@ class JinaBertSelfAttention(nn.Module):
             )
         self.attn_implementation = config.attn_implementation
-        if config.with_flash:
-            self.attn_implementation = 'triton'
         self.num_attention_heads = config.num_attention_heads
         self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
         self.all_head_size = self.num_attention_heads * self.attention_head_size

             )
         self.attn_implementation = config.attn_implementation
         self.num_attention_heads = config.num_attention_heads
         self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
         self.all_head_size = self.num_attention_heads * self.attention_head_size