Spaces:
Sleeping
Sleeping
medmediani
commited on
Commit
·
c9bc296
1
Parent(s):
03e9f5b
Changed the model path
Browse files- .ipynb_checkpoints/kwextractor-checkpoint.py +10 -1
- kwextractor.py +10 -1
.ipynb_checkpoints/kwextractor-checkpoint.py
CHANGED
@@ -39,7 +39,15 @@ class KeyWordExtractor():
|
|
39 |
stop_words=None)
|
40 |
)
|
41 |
print("KWS=",kws,file=sys.stderr)
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
def extract(self, ctxt, nkws=None, max_kw_ngs=None):
|
45 |
nkws= nkws if nkws is not None else self.NKW
|
@@ -47,5 +55,6 @@ class KeyWordExtractor():
|
|
47 |
|
48 |
#Since we are taking only 512 tokens, let's do by paragraph
|
49 |
kw=self._extract_by_paragraph(ctxt,nkws,max_kw_ngs)
|
|
|
50 |
return ", ".join(w for w,_ in kw)
|
51 |
|
|
|
39 |
stop_words=None)
|
40 |
)
|
41 |
print("KWS=",kws,file=sys.stderr)
|
42 |
+
kws.sort(key=lambda x: x[1],reverse=True)
|
43 |
+
ukws=set()
|
44 |
+
for kw,_ in kws:
|
45 |
+
|
46 |
+
if len(ukws)>=nkws:
|
47 |
+
return ukws
|
48 |
+
ukws.add(kw)
|
49 |
+
|
50 |
+
return ukws
|
51 |
|
52 |
def extract(self, ctxt, nkws=None, max_kw_ngs=None):
|
53 |
nkws= nkws if nkws is not None else self.NKW
|
|
|
55 |
|
56 |
#Since we are taking only 512 tokens, let's do by paragraph
|
57 |
kw=self._extract_by_paragraph(ctxt,nkws,max_kw_ngs)
|
58 |
+
return ", ".join(kw)
|
59 |
return ", ".join(w for w,_ in kw)
|
60 |
|
kwextractor.py
CHANGED
@@ -39,7 +39,15 @@ class KeyWordExtractor():
|
|
39 |
stop_words=None)
|
40 |
)
|
41 |
print("KWS=",kws,file=sys.stderr)
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
def extract(self, ctxt, nkws=None, max_kw_ngs=None):
|
45 |
nkws= nkws if nkws is not None else self.NKW
|
@@ -47,5 +55,6 @@ class KeyWordExtractor():
|
|
47 |
|
48 |
#Since we are taking only 512 tokens, let's do by paragraph
|
49 |
kw=self._extract_by_paragraph(ctxt,nkws,max_kw_ngs)
|
|
|
50 |
return ", ".join(w for w,_ in kw)
|
51 |
|
|
|
39 |
stop_words=None)
|
40 |
)
|
41 |
print("KWS=",kws,file=sys.stderr)
|
42 |
+
kws.sort(key=lambda x: x[1],reverse=True)
|
43 |
+
ukws=set()
|
44 |
+
for kw,_ in kws:
|
45 |
+
|
46 |
+
if len(ukws)>=nkws:
|
47 |
+
return ukws
|
48 |
+
ukws.add(kw)
|
49 |
+
|
50 |
+
return ukws
|
51 |
|
52 |
def extract(self, ctxt, nkws=None, max_kw_ngs=None):
|
53 |
nkws= nkws if nkws is not None else self.NKW
|
|
|
55 |
|
56 |
#Since we are taking only 512 tokens, let's do by paragraph
|
57 |
kw=self._extract_by_paragraph(ctxt,nkws,max_kw_ngs)
|
58 |
+
return ", ".join(kw)
|
59 |
return ", ".join(w for w,_ in kw)
|
60 |
|