Spaces:
Sleeping
Sleeping
Upload retrieval.py
Browse files- retrieval.py +3 -0
retrieval.py
CHANGED
@@ -7,11 +7,14 @@ from data_processing import embedding_model
|
|
7 |
from sentence_transformers import CrossEncoder
|
8 |
from nltk.tokenize import word_tokenize
|
9 |
import string
|
|
|
10 |
|
11 |
reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
|
12 |
|
13 |
retrieved_docs = None
|
14 |
|
|
|
|
|
15 |
# Tokenize the documents and remove punctuation
|
16 |
def preprocess(doc):
|
17 |
return [word.lower() for word in word_tokenize(doc) if word not in string.punctuation]
|
|
|
7 |
from sentence_transformers import CrossEncoder
|
8 |
from nltk.tokenize import word_tokenize
|
9 |
import string
|
10 |
+
import nltk
|
11 |
|
12 |
reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
|
13 |
|
14 |
retrieved_docs = None
|
15 |
|
16 |
+
nltk.download('punkt')
|
17 |
+
|
18 |
# Tokenize the documents and remove punctuation
|
19 |
def preprocess(doc):
|
20 |
return [word.lower() for word in word_tokenize(doc) if word not in string.punctuation]
|