Spaces:
Paused
Paused
| import pytest | |
| from typing import Any | |
| from huggingface_hub import snapshot_download | |
| from langchain.embeddings import HuggingFaceInstructEmbeddings | |
| from langchain.vectorstores import FAISS | |
| snapshot_download( | |
| repo_id='KonradSzafer/index-large-notebooks', | |
| allow_patterns=['*.faiss', '*.pkl'], | |
| repo_type='dataset', | |
| local_dir='indexes/' | |
| ) | |
| def embedding_model() -> HuggingFaceInstructEmbeddings: | |
| model_name = 'hkunlp/instructor-large' | |
| embed_instruction = 'Represent the Hugging Face library documentation' | |
| query_instruction = 'Query the most relevant piece of information from the Hugging Face documentation' | |
| return HuggingFaceInstructEmbeddings( | |
| model_name=model_name, | |
| embed_instruction=embed_instruction, | |
| query_instruction=query_instruction, | |
| ) | |
| def index_path() -> str: | |
| return 'indexes/' | |
| def index(embedding_model: HuggingFaceInstructEmbeddings, index_path: str): | |
| return FAISS.load_local(index_path, embedding_model) | |
| def query() -> str: | |
| return 'How to use the tokenizer?' | |
| def test_load_index(embedding_model: HuggingFaceInstructEmbeddings, index_path: str): | |
| index = FAISS.load_local(index_path, embedding_model) | |
| assert index is not None, 'Failed to load index' | |
| def test_index_page_content(index, query: str): | |
| query_docs = index.similarity_search(query=query, k=3) | |
| assert isinstance(query_docs[0].page_content, str) | |
| def test_index_metadata(index, query): | |
| query_docs = index.similarity_search(query=query, k=3) | |
| assert isinstance(query_docs[0].metadata['source'], str) | |