Build BM25 index from documents and save to output directory
Browse files
app.py
CHANGED
|
@@ -363,17 +363,16 @@ class Hit(TypedDict):
|
|
| 363 |
|
| 364 |
demo: Optional[gr.Interface] = None # Assign your gradio demo to this variable
|
| 365 |
return_type = List[Hit]
|
|
|
|
|
|
|
| 366 |
bm25_index = BM25Index.build_from_documents(
|
| 367 |
documents=iter(sciq.corpus),
|
| 368 |
ndocs=12160,
|
| 369 |
show_progress_bar=True,
|
| 370 |
-
k1=0.9,
|
| 371 |
-
b=0.4,
|
| 372 |
)
|
| 373 |
bm25_index.save("output/bm25_index")
|
| 374 |
bm25_retriever = BM25Retriever(index_dir="output/bm25_index")
|
| 375 |
|
| 376 |
-
## YOUR_CODE_STARTS_HERE
|
| 377 |
def retrieve(query: str, topk: int = 10) -> return_type:
|
| 378 |
ranking = bm25_retriever.retrieve(query=query, topk=topk)
|
| 379 |
hits = []
|
|
|
|
| 363 |
|
| 364 |
demo: Optional[gr.Interface] = None # Assign your gradio demo to this variable
|
| 365 |
return_type = List[Hit]
|
| 366 |
+
|
| 367 |
+
## YOUR_CODE_STARTS_HERE
|
| 368 |
bm25_index = BM25Index.build_from_documents(
|
| 369 |
documents=iter(sciq.corpus),
|
| 370 |
ndocs=12160,
|
| 371 |
show_progress_bar=True,
|
|
|
|
|
|
|
| 372 |
)
|
| 373 |
bm25_index.save("output/bm25_index")
|
| 374 |
bm25_retriever = BM25Retriever(index_dir="output/bm25_index")
|
| 375 |
|
|
|
|
| 376 |
def retrieve(query: str, topk: int = 10) -> return_type:
|
| 377 |
ranking = bm25_retriever.retrieve(query=query, topk=topk)
|
| 378 |
hits = []
|