Commit
·
8871684
1
Parent(s):
a9e6148
TransformersUD improved
Browse files
README.md
CHANGED
@@ -65,16 +65,17 @@ class TransformersUD(object):
|
|
65 |
w=[(t["start"],t["end"],t["entity_group"]) for t in self.deprel(text)]
|
66 |
z,n={t["start"]:t["entity"].split("|") for t in self.tagger(text)},len(w)
|
67 |
r,m=[text[s:e] for s,e,p in w],numpy.full((n+1,n+1),numpy.nan)
|
68 |
-
v=self.tokenizer(r,add_special_tokens=False)["input_ids"]
|
69 |
for i,t in enumerate(v):
|
70 |
q=[self.tokenizer.cls_token_id]+t+[self.tokenizer.sep_token_id]
|
71 |
-
c
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
|
|
76 |
for j in range(n):
|
77 |
-
m[i+1,0 if i==j else j+1]=s[b[j]]+e[b[j+1]-1]
|
78 |
i=numpy.nanargmax(m[:,0])
|
79 |
m[0:i,0]=m[i+1:,0]=numpy.nan
|
80 |
h=ufal.chu_liu_edmonds.chu_liu_edmonds(m)[0]
|
|
|
65 |
w=[(t["start"],t["end"],t["entity_group"]) for t in self.deprel(text)]
|
66 |
z,n={t["start"]:t["entity"].split("|") for t in self.tagger(text)},len(w)
|
67 |
r,m=[text[s:e] for s,e,p in w],numpy.full((n+1,n+1),numpy.nan)
|
68 |
+
v,c=self.tokenizer(r,add_special_tokens=False)["input_ids"],[]
|
69 |
for i,t in enumerate(v):
|
70 |
q=[self.tokenizer.cls_token_id]+t+[self.tokenizer.sep_token_id]
|
71 |
+
c.append([q]+v[0:i]+[[self.tokenizer.mask_token_id]]+v[i+1:]+[[q[-1]]])
|
72 |
+
b=[[len(sum(x[0:j+1],[])) for j in range(len(x))] for x in c]
|
73 |
+
d=self.model(input_ids=torch.tensor([sum(x,[]) for x in c]),
|
74 |
+
token_type_ids=torch.tensor([[0]*x[0]+[1]*(x[-1]-x[0]) for x in b]))
|
75 |
+
s,e=d.start_logits.tolist(),d.end_logits.tolist()
|
76 |
+
for i in range(n):
|
77 |
for j in range(n):
|
78 |
+
m[i+1,0 if i==j else j+1]=s[i][b[i][j]]+e[i][b[i][j+1]-1]
|
79 |
i=numpy.nanargmax(m[:,0])
|
80 |
m[0:i,0]=m[i+1:,0]=numpy.nan
|
81 |
h=ufal.chu_liu_edmonds.chu_liu_edmonds(m)[0]
|