File size: 6,298 Bytes
402e33f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# from utils.vector_store import get_vector_store

# def test_retriever(query: str, k: int = 3, vector_store_path="./chroma_db"):
#     """Test retriever with specific vector store"""
#     vector_store = get_vector_store(persist_directory=vector_store_path)
#     retriever = vector_store.as_retriever(search_kwargs={"k": k})
#     docs = retriever.get_relevant_documents(query)

#     # Deduplicate based on page_content
#     seen = set()
#     unique_docs = []
#     for doc in docs:
#         if doc.page_content not in seen:
#             seen.add(doc.page_content)
#             unique_docs.append(doc)

#     print(f"\nUsing vector store: {vector_store_path}")
#     print(f"Top {len(unique_docs)} unique chunks retrieved for: '{query}'\n")

#     for i, doc in enumerate(unique_docs, 1):
#         source = doc.metadata.get("source", "unknown")
#         page = doc.metadata.get("page", "N/A")
#         print(f"--- Chunk #{i} ---")
#         print(f"Source: {source} | Page: {page}")
#         preview = doc.page_content[:300]
#         if len(doc.page_content) > 300:
#             preview += "..."
#         print(preview)
#         print()


# def compare_retrievers(query: str, k: int = 3):
#     """Compare results from different vector stores"""
#     stores = {
#         "MES Manual": "./vector_stores/mes_db",
#         "Technical Docs": "./vector_stores/tech_db",
#         "General Docs": "./vector_stores/general_db"
#     }

#     print(f"\n=== Comparing retrievers for: '{query}' ===\n")

#     for store_name, store_path in stores.items():
#         try:
#             print(f"πŸ” {store_name}:")
#             print("-" * 50)
#             test_retriever(query, k=k, vector_store_path=store_path)
#             print("\n" + "="*60 + "\n")
#         except Exception as e:
#             print(f"❌ Could not access {store_name}: {e}\n")


# if __name__ == "__main__":
#     print("Multi-Vector Store RAG Tester")
#     print("\nAvailable commands:")
#     print("  - Enter a question to test default store")
#     print("  - Type 'mes: <question>' for MES manual")
#     print("  - Type 'tech: <question>' for technical docs")
#     print("  - Type 'general: <question>' for general docs")
#     print("  - Type 'compare: <question>' to compare all stores")
#     print("  - Type 'exit' to quit")

#     while True:
#         user_input = input("\nEnter your question: ").strip()

#         if user_input.lower() == "exit":
#             break
#         elif user_input.lower().startswith("mes: "):
#             query = user_input[5:]
#             test_retriever(query, vector_store_path="./vector_stores/mes_db")
#         elif user_input.lower().startswith("tech: "):
#             query = user_input[6:]
#             test_retriever(query, vector_store_path="./vector_stores/tech_db")
#         elif user_input.lower().startswith("general: "):
#             query = user_input[9:]
#             test_retriever(query, vector_store_path="./vector_stores/general_db")
#         elif user_input.lower().startswith("compare: "):
#             query = user_input[9:]
#             compare_retrievers(query)
#         else:
#             test_retriever(user_input)  # Default store


from utils.vector_store import get_vector_store


def test_retriever(query: str, k: int = 3, vector_store_path="./chroma_db"):
    """Test retriever with specific vector store"""
    vector_store = get_vector_store(persist_directory=vector_store_path)
    retriever = vector_store.as_retriever(search_kwargs={"k": k})
    docs = retriever.get_relevant_documents(query)

    # Deduplicate based on page_content
    seen = set()
    unique_docs = []
    for doc in docs:
        if doc.page_content not in seen:
            seen.add(doc.page_content)
            unique_docs.append(doc)

    print(f"\nUsing vector store: {vector_store_path}")
    print(f"Top {len(unique_docs)} unique chunks retrieved for: '{query}'\n")

    for i, doc in enumerate(unique_docs, 1):
        source = doc.metadata.get("source", "unknown")
        page = doc.metadata.get("page", "N/A")
        print(f"--- Chunk #{i} ---")
        print(f"Source: {source} | Page: {page}")
        preview = doc.page_content[:300]
        if len(doc.page_content) > 300:
            preview += "..."
        print(preview)
        print()


def compare_retrievers(query: str, k: int = 3):
    """Compare results from different vector stores"""
    stores = {
        "MES Manual": "./vector_stores/mes_db",
        "Technical Docs": "./vector_stores/tech_db",
        "General Docs": "./vector_stores/general_db"
    }

    print(f"\n=== Comparing retrievers for: '{query}' ===\n")

    for store_name, store_path in stores.items():
        try:
            print(f"πŸ” {store_name}:")
            print("-" * 50)
            test_retriever(query, k=k, vector_store_path=store_path)
            print("\n" + "="*60 + "\n")
        except Exception as e:
            print(f"❌ Could not access {store_name}: {e}\n")


if __name__ == "__main__":
    print("Multi-Vector Store RAG Tester")
    print("\nAvailable commands:")
    print("  - Enter a question to test default store")
    print("  - Type 'mes: <question>' for MES manual")
    print("  - Type 'tech: <question>' for technical docs")
    print("  - Type 'general: <question>' for general docs")
    print("  - Type 'compare: <question>' to compare all stores")
    print("  - Type 'exit' to quit")

    while True:
        user_input = input("\nEnter your question: ").strip()

        if user_input.lower() == "exit":
            break
        elif user_input.lower().startswith("mes: "):
            query = user_input[5:]
            test_retriever(query, vector_store_path="./vector_stores/mes_db")
        elif user_input.lower().startswith("tech: "):
            query = user_input[6:]
            test_retriever(query, vector_store_path="./vector_stores/tech_db")
        elif user_input.lower().startswith("general: "):
            query = user_input[9:]
            test_retriever(
                query, vector_store_path="./vector_stores/general_db")
        elif user_input.lower().startswith("compare: "):
            query = user_input[9:]
            compare_retrievers(query)
        else:
            test_retriever(user_input)  # Default store