KoichiYasuoka commited on
Commit
8871684
·
1 Parent(s): a9e6148

TransformersUD improved

Browse files
Files changed (1) hide show
  1. README.md +8 -7
README.md CHANGED
@@ -65,16 +65,17 @@ class TransformersUD(object):
65
  w=[(t["start"],t["end"],t["entity_group"]) for t in self.deprel(text)]
66
  z,n={t["start"]:t["entity"].split("|") for t in self.tagger(text)},len(w)
67
  r,m=[text[s:e] for s,e,p in w],numpy.full((n+1,n+1),numpy.nan)
68
- v=self.tokenizer(r,add_special_tokens=False)["input_ids"]
69
  for i,t in enumerate(v):
70
  q=[self.tokenizer.cls_token_id]+t+[self.tokenizer.sep_token_id]
71
- c=[q]+v[0:i]+[[self.tokenizer.mask_token_id]]+v[i+1:]+[[q[-1]]]
72
- b=[len(sum(c[0:j+1],[])) for j in range(len(c))]
73
- d=self.model(input_ids=torch.tensor([sum(c,[])]),
74
- token_type_ids=torch.tensor([[0]*b[0]+[1]*(b[-1]-b[0])]))
75
- s,e=d.start_logits.tolist()[0],d.end_logits.tolist()[0]
 
76
  for j in range(n):
77
- m[i+1,0 if i==j else j+1]=s[b[j]]+e[b[j+1]-1]
78
  i=numpy.nanargmax(m[:,0])
79
  m[0:i,0]=m[i+1:,0]=numpy.nan
80
  h=ufal.chu_liu_edmonds.chu_liu_edmonds(m)[0]
 
65
  w=[(t["start"],t["end"],t["entity_group"]) for t in self.deprel(text)]
66
  z,n={t["start"]:t["entity"].split("|") for t in self.tagger(text)},len(w)
67
  r,m=[text[s:e] for s,e,p in w],numpy.full((n+1,n+1),numpy.nan)
68
+ v,c=self.tokenizer(r,add_special_tokens=False)["input_ids"],[]
69
  for i,t in enumerate(v):
70
  q=[self.tokenizer.cls_token_id]+t+[self.tokenizer.sep_token_id]
71
+ c.append([q]+v[0:i]+[[self.tokenizer.mask_token_id]]+v[i+1:]+[[q[-1]]])
72
+ b=[[len(sum(x[0:j+1],[])) for j in range(len(x))] for x in c]
73
+ d=self.model(input_ids=torch.tensor([sum(x,[]) for x in c]),
74
+ token_type_ids=torch.tensor([[0]*x[0]+[1]*(x[-1]-x[0]) for x in b]))
75
+ s,e=d.start_logits.tolist(),d.end_logits.tolist()
76
+ for i in range(n):
77
  for j in range(n):
78
+ m[i+1,0 if i==j else j+1]=s[i][b[i][j]]+e[i][b[i][j+1]-1]
79
  i=numpy.nanargmax(m[:,0])
80
  m[0:i,0]=m[i+1:,0]=numpy.nan
81
  h=ufal.chu_liu_edmonds.chu_liu_edmonds(m)[0]