medmediani commited on
Commit
c9bc296
·
1 Parent(s): 03e9f5b

Changed the model path

Browse files
.ipynb_checkpoints/kwextractor-checkpoint.py CHANGED
@@ -39,7 +39,15 @@ class KeyWordExtractor():
39
  stop_words=None)
40
  )
41
  print("KWS=",kws,file=sys.stderr)
42
- return sorted(kws, key=lambda x: x[1],reverse=True)[:nkws]
 
 
 
 
 
 
 
 
43
 
44
  def extract(self, ctxt, nkws=None, max_kw_ngs=None):
45
  nkws= nkws if nkws is not None else self.NKW
@@ -47,5 +55,6 @@ class KeyWordExtractor():
47
 
48
  #Since we are taking only 512 tokens, let's do by paragraph
49
  kw=self._extract_by_paragraph(ctxt,nkws,max_kw_ngs)
 
50
  return ", ".join(w for w,_ in kw)
51
 
 
39
  stop_words=None)
40
  )
41
  print("KWS=",kws,file=sys.stderr)
42
+ kws.sort(key=lambda x: x[1],reverse=True)
43
+ ukws=set()
44
+ for kw,_ in kws:
45
+
46
+ if len(ukws)>=nkws:
47
+ return ukws
48
+ ukws.add(kw)
49
+
50
+ return ukws
51
 
52
  def extract(self, ctxt, nkws=None, max_kw_ngs=None):
53
  nkws= nkws if nkws is not None else self.NKW
 
55
 
56
  #Since we are taking only 512 tokens, let's do by paragraph
57
  kw=self._extract_by_paragraph(ctxt,nkws,max_kw_ngs)
58
+ return ", ".join(kw)
59
  return ", ".join(w for w,_ in kw)
60
 
kwextractor.py CHANGED
@@ -39,7 +39,15 @@ class KeyWordExtractor():
39
  stop_words=None)
40
  )
41
  print("KWS=",kws,file=sys.stderr)
42
- return sorted(kws, key=lambda x: x[1],reverse=True)[:nkws]
 
 
 
 
 
 
 
 
43
 
44
  def extract(self, ctxt, nkws=None, max_kw_ngs=None):
45
  nkws= nkws if nkws is not None else self.NKW
@@ -47,5 +55,6 @@ class KeyWordExtractor():
47
 
48
  #Since we are taking only 512 tokens, let's do by paragraph
49
  kw=self._extract_by_paragraph(ctxt,nkws,max_kw_ngs)
 
50
  return ", ".join(w for w,_ in kw)
51
 
 
39
  stop_words=None)
40
  )
41
  print("KWS=",kws,file=sys.stderr)
42
+ kws.sort(key=lambda x: x[1],reverse=True)
43
+ ukws=set()
44
+ for kw,_ in kws:
45
+
46
+ if len(ukws)>=nkws:
47
+ return ukws
48
+ ukws.add(kw)
49
+
50
+ return ukws
51
 
52
  def extract(self, ctxt, nkws=None, max_kw_ngs=None):
53
  nkws= nkws if nkws is not None else self.NKW
 
55
 
56
  #Since we are taking only 512 tokens, let's do by paragraph
57
  kw=self._extract_by_paragraph(ctxt,nkws,max_kw_ngs)
58
+ return ", ".join(kw)
59
  return ", ".join(w for w,_ in kw)
60