merge changes

Signed-off-by: Meow <[email protected]>

Files changed (3) hide show

embedding.py CHANGED Viewed

@@ -59,7 +59,6 @@ class XLMRobertaEmbeddings(nn.Module):
                 embeddings[task_indices] = task_embeddings
         else:
             embeddings = self.word_embeddings(input_ids)
         if self.max_position_embeddings > 0:
             if position_ids is None:
                 position_ids = create_position_ids_from_input_ids(input_ids, padding_idx=self.word_embeddings.padding_idx).to(input_ids.device)
@@ -79,5 +78,4 @@ class XLMRobertaEmbeddings(nn.Module):
             else:
                 token_type_embeddings = self.token_type_embeddings(token_type_ids)
                 embeddings = embeddings + token_type_embeddings
         return embeddings

                 embeddings[task_indices] = task_embeddings
         else:
             embeddings = self.word_embeddings(input_ids)
         if self.max_position_embeddings > 0:
             if position_ids is None:
                 position_ids = create_position_ids_from_input_ids(input_ids, padding_idx=self.word_embeddings.padding_idx).to(input_ids.device)
             else:
                 token_type_embeddings = self.token_type_embeddings(token_type_ids)
                 embeddings = embeddings + token_type_embeddings
         return embeddings

mha.py CHANGED Viewed

@@ -643,6 +643,7 @@ class MHA(nn.Module):
             inference_params.max_sequence_len if inference_params is not None else max_seqlen
         )
         batch, seqlen = x.shape[:2]
         if not self.cross_attn and self.num_heads_kv == self.num_heads:
             assert x_kv is None and mixer_subset is None

             inference_params.max_sequence_len if inference_params is not None else max_seqlen
         )
         batch, seqlen = x.shape[:2]
+        lora_kwargs = {}
         if not self.cross_attn and self.num_heads_kv == self.num_heads:
             assert x_kv is None and mixer_subset is None

modeling_xlm_roberta.py CHANGED Viewed

@@ -213,7 +213,6 @@ class XLMRobertaEncoder(nn.Module):
             mixer_kwargs = {'adapter_mask': adapter_mask}
             if key_padding_mask is not None:
                 mixer_kwargs['key_padding_mask'] = key_padding_mask.bool()
             for layer in self.layers:
                 if self._grad_checkpointing:
                     hidden_states = torch.utils.checkpoint.checkpoint(

             mixer_kwargs = {'adapter_mask': adapter_mask}
             if key_padding_mask is not None:
                 mixer_kwargs['key_padding_mask'] = key_padding_mask.bool()
             for layer in self.layers:
                 if self._grad_checkpointing:
                     hidden_states = torch.utils.checkpoint.checkpoint(