Spaces:
Sleeping
Sleeping
# from utils.vector_store import get_vector_store | |
# def test_retriever(query: str, k: int = 3, vector_store_path="./chroma_db"): | |
# """Test retriever with specific vector store""" | |
# vector_store = get_vector_store(persist_directory=vector_store_path) | |
# retriever = vector_store.as_retriever(search_kwargs={"k": k}) | |
# docs = retriever.get_relevant_documents(query) | |
# # Deduplicate based on page_content | |
# seen = set() | |
# unique_docs = [] | |
# for doc in docs: | |
# if doc.page_content not in seen: | |
# seen.add(doc.page_content) | |
# unique_docs.append(doc) | |
# print(f"\nUsing vector store: {vector_store_path}") | |
# print(f"Top {len(unique_docs)} unique chunks retrieved for: '{query}'\n") | |
# for i, doc in enumerate(unique_docs, 1): | |
# source = doc.metadata.get("source", "unknown") | |
# page = doc.metadata.get("page", "N/A") | |
# print(f"--- Chunk #{i} ---") | |
# print(f"Source: {source} | Page: {page}") | |
# preview = doc.page_content[:300] | |
# if len(doc.page_content) > 300: | |
# preview += "..." | |
# print(preview) | |
# print() | |
# def compare_retrievers(query: str, k: int = 3): | |
# """Compare results from different vector stores""" | |
# stores = { | |
# "MES Manual": "./vector_stores/mes_db", | |
# "Technical Docs": "./vector_stores/tech_db", | |
# "General Docs": "./vector_stores/general_db" | |
# } | |
# print(f"\n=== Comparing retrievers for: '{query}' ===\n") | |
# for store_name, store_path in stores.items(): | |
# try: | |
# print(f"π {store_name}:") | |
# print("-" * 50) | |
# test_retriever(query, k=k, vector_store_path=store_path) | |
# print("\n" + "="*60 + "\n") | |
# except Exception as e: | |
# print(f"β Could not access {store_name}: {e}\n") | |
# if __name__ == "__main__": | |
# print("Multi-Vector Store RAG Tester") | |
# print("\nAvailable commands:") | |
# print(" - Enter a question to test default store") | |
# print(" - Type 'mes: <question>' for MES manual") | |
# print(" - Type 'tech: <question>' for technical docs") | |
# print(" - Type 'general: <question>' for general docs") | |
# print(" - Type 'compare: <question>' to compare all stores") | |
# print(" - Type 'exit' to quit") | |
# while True: | |
# user_input = input("\nEnter your question: ").strip() | |
# if user_input.lower() == "exit": | |
# break | |
# elif user_input.lower().startswith("mes: "): | |
# query = user_input[5:] | |
# test_retriever(query, vector_store_path="./vector_stores/mes_db") | |
# elif user_input.lower().startswith("tech: "): | |
# query = user_input[6:] | |
# test_retriever(query, vector_store_path="./vector_stores/tech_db") | |
# elif user_input.lower().startswith("general: "): | |
# query = user_input[9:] | |
# test_retriever(query, vector_store_path="./vector_stores/general_db") | |
# elif user_input.lower().startswith("compare: "): | |
# query = user_input[9:] | |
# compare_retrievers(query) | |
# else: | |
# test_retriever(user_input) # Default store | |
from utils.vector_store import get_vector_store | |
def test_retriever(query: str, k: int = 3, vector_store_path="./chroma_db"): | |
"""Test retriever with specific vector store""" | |
vector_store = get_vector_store(persist_directory=vector_store_path) | |
retriever = vector_store.as_retriever(search_kwargs={"k": k}) | |
docs = retriever.get_relevant_documents(query) | |
# Deduplicate based on page_content | |
seen = set() | |
unique_docs = [] | |
for doc in docs: | |
if doc.page_content not in seen: | |
seen.add(doc.page_content) | |
unique_docs.append(doc) | |
print(f"\nUsing vector store: {vector_store_path}") | |
print(f"Top {len(unique_docs)} unique chunks retrieved for: '{query}'\n") | |
for i, doc in enumerate(unique_docs, 1): | |
source = doc.metadata.get("source", "unknown") | |
page = doc.metadata.get("page", "N/A") | |
print(f"--- Chunk #{i} ---") | |
print(f"Source: {source} | Page: {page}") | |
preview = doc.page_content[:300] | |
if len(doc.page_content) > 300: | |
preview += "..." | |
print(preview) | |
print() | |
def compare_retrievers(query: str, k: int = 3): | |
"""Compare results from different vector stores""" | |
stores = { | |
"MES Manual": "./vector_stores/mes_db", | |
"Technical Docs": "./vector_stores/tech_db", | |
"General Docs": "./vector_stores/general_db" | |
} | |
print(f"\n=== Comparing retrievers for: '{query}' ===\n") | |
for store_name, store_path in stores.items(): | |
try: | |
print(f"π {store_name}:") | |
print("-" * 50) | |
test_retriever(query, k=k, vector_store_path=store_path) | |
print("\n" + "="*60 + "\n") | |
except Exception as e: | |
print(f"β Could not access {store_name}: {e}\n") | |
if __name__ == "__main__": | |
print("Multi-Vector Store RAG Tester") | |
print("\nAvailable commands:") | |
print(" - Enter a question to test default store") | |
print(" - Type 'mes: <question>' for MES manual") | |
print(" - Type 'tech: <question>' for technical docs") | |
print(" - Type 'general: <question>' for general docs") | |
print(" - Type 'compare: <question>' to compare all stores") | |
print(" - Type 'exit' to quit") | |
while True: | |
user_input = input("\nEnter your question: ").strip() | |
if user_input.lower() == "exit": | |
break | |
elif user_input.lower().startswith("mes: "): | |
query = user_input[5:] | |
test_retriever(query, vector_store_path="./vector_stores/mes_db") | |
elif user_input.lower().startswith("tech: "): | |
query = user_input[6:] | |
test_retriever(query, vector_store_path="./vector_stores/tech_db") | |
elif user_input.lower().startswith("general: "): | |
query = user_input[9:] | |
test_retriever( | |
query, vector_store_path="./vector_stores/general_db") | |
elif user_input.lower().startswith("compare: "): | |
query = user_input[9:] | |
compare_retrievers(query) | |
else: | |
test_retriever(user_input) # Default store | |