mes-chatbot-rag-backend / test_retriever.py
christian
Remove big files for HF
402e33f
# from utils.vector_store import get_vector_store
# def test_retriever(query: str, k: int = 3, vector_store_path="./chroma_db"):
# """Test retriever with specific vector store"""
# vector_store = get_vector_store(persist_directory=vector_store_path)
# retriever = vector_store.as_retriever(search_kwargs={"k": k})
# docs = retriever.get_relevant_documents(query)
# # Deduplicate based on page_content
# seen = set()
# unique_docs = []
# for doc in docs:
# if doc.page_content not in seen:
# seen.add(doc.page_content)
# unique_docs.append(doc)
# print(f"\nUsing vector store: {vector_store_path}")
# print(f"Top {len(unique_docs)} unique chunks retrieved for: '{query}'\n")
# for i, doc in enumerate(unique_docs, 1):
# source = doc.metadata.get("source", "unknown")
# page = doc.metadata.get("page", "N/A")
# print(f"--- Chunk #{i} ---")
# print(f"Source: {source} | Page: {page}")
# preview = doc.page_content[:300]
# if len(doc.page_content) > 300:
# preview += "..."
# print(preview)
# print()
# def compare_retrievers(query: str, k: int = 3):
# """Compare results from different vector stores"""
# stores = {
# "MES Manual": "./vector_stores/mes_db",
# "Technical Docs": "./vector_stores/tech_db",
# "General Docs": "./vector_stores/general_db"
# }
# print(f"\n=== Comparing retrievers for: '{query}' ===\n")
# for store_name, store_path in stores.items():
# try:
# print(f"πŸ” {store_name}:")
# print("-" * 50)
# test_retriever(query, k=k, vector_store_path=store_path)
# print("\n" + "="*60 + "\n")
# except Exception as e:
# print(f"❌ Could not access {store_name}: {e}\n")
# if __name__ == "__main__":
# print("Multi-Vector Store RAG Tester")
# print("\nAvailable commands:")
# print(" - Enter a question to test default store")
# print(" - Type 'mes: <question>' for MES manual")
# print(" - Type 'tech: <question>' for technical docs")
# print(" - Type 'general: <question>' for general docs")
# print(" - Type 'compare: <question>' to compare all stores")
# print(" - Type 'exit' to quit")
# while True:
# user_input = input("\nEnter your question: ").strip()
# if user_input.lower() == "exit":
# break
# elif user_input.lower().startswith("mes: "):
# query = user_input[5:]
# test_retriever(query, vector_store_path="./vector_stores/mes_db")
# elif user_input.lower().startswith("tech: "):
# query = user_input[6:]
# test_retriever(query, vector_store_path="./vector_stores/tech_db")
# elif user_input.lower().startswith("general: "):
# query = user_input[9:]
# test_retriever(query, vector_store_path="./vector_stores/general_db")
# elif user_input.lower().startswith("compare: "):
# query = user_input[9:]
# compare_retrievers(query)
# else:
# test_retriever(user_input) # Default store
from utils.vector_store import get_vector_store
def test_retriever(query: str, k: int = 3, vector_store_path="./chroma_db"):
"""Test retriever with specific vector store"""
vector_store = get_vector_store(persist_directory=vector_store_path)
retriever = vector_store.as_retriever(search_kwargs={"k": k})
docs = retriever.get_relevant_documents(query)
# Deduplicate based on page_content
seen = set()
unique_docs = []
for doc in docs:
if doc.page_content not in seen:
seen.add(doc.page_content)
unique_docs.append(doc)
print(f"\nUsing vector store: {vector_store_path}")
print(f"Top {len(unique_docs)} unique chunks retrieved for: '{query}'\n")
for i, doc in enumerate(unique_docs, 1):
source = doc.metadata.get("source", "unknown")
page = doc.metadata.get("page", "N/A")
print(f"--- Chunk #{i} ---")
print(f"Source: {source} | Page: {page}")
preview = doc.page_content[:300]
if len(doc.page_content) > 300:
preview += "..."
print(preview)
print()
def compare_retrievers(query: str, k: int = 3):
"""Compare results from different vector stores"""
stores = {
"MES Manual": "./vector_stores/mes_db",
"Technical Docs": "./vector_stores/tech_db",
"General Docs": "./vector_stores/general_db"
}
print(f"\n=== Comparing retrievers for: '{query}' ===\n")
for store_name, store_path in stores.items():
try:
print(f"πŸ” {store_name}:")
print("-" * 50)
test_retriever(query, k=k, vector_store_path=store_path)
print("\n" + "="*60 + "\n")
except Exception as e:
print(f"❌ Could not access {store_name}: {e}\n")
if __name__ == "__main__":
print("Multi-Vector Store RAG Tester")
print("\nAvailable commands:")
print(" - Enter a question to test default store")
print(" - Type 'mes: <question>' for MES manual")
print(" - Type 'tech: <question>' for technical docs")
print(" - Type 'general: <question>' for general docs")
print(" - Type 'compare: <question>' to compare all stores")
print(" - Type 'exit' to quit")
while True:
user_input = input("\nEnter your question: ").strip()
if user_input.lower() == "exit":
break
elif user_input.lower().startswith("mes: "):
query = user_input[5:]
test_retriever(query, vector_store_path="./vector_stores/mes_db")
elif user_input.lower().startswith("tech: "):
query = user_input[6:]
test_retriever(query, vector_store_path="./vector_stores/tech_db")
elif user_input.lower().startswith("general: "):
query = user_input[9:]
test_retriever(
query, vector_store_path="./vector_stores/general_db")
elif user_input.lower().startswith("compare: "):
query = user_input[9:]
compare_retrievers(query)
else:
test_retriever(user_input) # Default store