anubhavg97
/

constbert-onnx

Feature Extraction

text-embeddings-inference

Model card Files Files and versions

ag-nexla commited on Jul 4

Commit

09fb7a6

·

1 Parent(s): 9cb83c5

updated onnx model

Files changed (2) hide show

model.onnx +2 -2
modeling.py +4 -5

model.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:503b7a157de83c5ae3fc63dac56be01bf724cdc4c7f2141febfbe2d65ce8468d
-size 436270743

 version https://git-lfs.github.com/spec/v1
+oid sha256:30dae9a99d07f56c103a09173deaa9f76f141976ca20dd8f7e5a5cce8152dee8
+size 436269030

modeling.py CHANGED Viewed

@@ -124,9 +124,7 @@ class ConstBERT(BertPreTrainedModel):
         # Q = self.query_project(Q) #(64, 128,8)
         # Q = Q.permute(0, 2, 1) #(64,8,128)
         Q = self.linear(Q)
-        # mask = torch.ones(Q.shape[0], Q.shape[1], device=self.device).unsqueeze(2).float()
-        mask = torch.tensor(self.mask(input_ids, skiplist=[]), device=self.device).unsqueeze(2).float()
         Q = Q * mask
         return torch.nn.functional.normalize(Q, p=2, dim=2)
@@ -165,8 +163,9 @@ class ConstBERT(BertPreTrainedModel):
         return D
     def mask(self, input_ids, skiplist):
-        mask = [[(x not in skiplist) and (x != self.pad_token) for x in d] for d in input_ids.cpu().tolist()]
-        return mask
     def query(self, *args, to_cpu=False, **kw_args):
         with torch.no_grad():

         # Q = self.query_project(Q) #(64, 128,8)
         # Q = Q.permute(0, 2, 1) #(64,8,128)
         Q = self.linear(Q)
+        mask = self.mask(input_ids, skiplist=[]).unsqueeze(2)
         Q = Q * mask
         return torch.nn.functional.normalize(Q, p=2, dim=2)
         return D
     def mask(self, input_ids, skiplist):
+        # For ONNX export and inference, skiplist should be empty
+        # Create mask: 1 where input_ids != pad_token, else 0
+        return (input_ids != self.pad_token).float()
     def query(self, *args, to_cpu=False, **kw_args):
         with torch.no_grad():