cb1716pics commited on
Commit
6b4fbf8
·
verified ·
1 Parent(s): ece1395

Upload retrieval.py

Browse files
Files changed (1) hide show
  1. retrieval.py +3 -0
retrieval.py CHANGED
@@ -7,11 +7,14 @@ from data_processing import embedding_model
7
  from sentence_transformers import CrossEncoder
8
  from nltk.tokenize import word_tokenize
9
  import string
 
10
 
11
  reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
12
 
13
  retrieved_docs = None
14
 
 
 
15
  # Tokenize the documents and remove punctuation
16
  def preprocess(doc):
17
  return [word.lower() for word in word_tokenize(doc) if word not in string.punctuation]
 
7
  from sentence_transformers import CrossEncoder
8
  from nltk.tokenize import word_tokenize
9
  import string
10
+ import nltk
11
 
12
  reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
13
 
14
  retrieved_docs = None
15
 
16
+ nltk.download('punkt')
17
+
18
  # Tokenize the documents and remove punctuation
19
  def preprocess(doc):
20
  return [word.lower() for word in word_tokenize(doc) if word not in string.punctuation]