Spaces:

gourisankar85
/

TalkToYourDocument

Running

gourisankar85 commited on Apr 10

Commit

ecc9585

verified ·

1 Parent(s): c737bb6

Upload 4 files

Files changed (3) hide show

retriever/document_manager.py CHANGED Viewed

@@ -43,7 +43,7 @@ class DocumentManager:
             self.document_ids[filename] = doc_id
             # Chunk the pages
-            chunks = chunk_documents(page_list, doc_id, chunk_size=1000, chunk_overlap=200)
             self.chunked_documents[filename] = chunks
             # Add chunks to vector store

             self.document_ids[filename] = doc_id
             # Chunk the pages
+            chunks = chunk_documents(page_list, doc_id, chunk_size=2000, chunk_overlap=300)
             self.chunked_documents[filename] = chunks
             # Add chunks to vector store

retriever/llm_manager.py CHANGED Viewed

@@ -109,7 +109,7 @@ class LLMManager:
             result = qa_chain.invoke({"query": question})
             response = result['result']
             source_docs = result['source_documents']
-            logging.info(f"Generated response for question: {question} : {response}")
             return response, source_docs
         except Exception as e:
             logging.error(f"Error during QA chain invocation: {str(e)}")

             result = qa_chain.invoke({"query": question})
             response = result['result']
             source_docs = result['source_documents']
+            #logging.info(f"Generated response for question: {question} : {response}")
             return response, source_docs
         except Exception as e:
             logging.error(f"Error during QA chain invocation: {str(e)}")

retriever/vector_store_manager.py CHANGED Viewed

@@ -26,13 +26,15 @@ class VectorStoreManager:
                 allow_dangerous_deserialization=True
             )
         else:
-            logging.info("Creating new vector store")
             # Return an empty vector store; it will be populated when documents are added
             return FAISS.from_texts(
                 texts=[""],  # Dummy text to initialize
                 embedding=self.embedding_model,
                 metadatas=[{"source": "init", "doc_id": "init"}]
-            )
     def add_documents(self, documents):
         """
@@ -48,10 +50,16 @@ class VectorStoreManager:
         metadatas = [{'source': doc['source'], 'doc_id': doc['doc_id']} for doc in documents]
         logging.info("Adding new documents to vector store")
-        self.vector_store.add_texts(
-            texts=texts,
-            metadatas=metadatas
-        )
         self.vector_store.save_local(self.embedding_path)
         logging.info(f"Vector store updated and saved to {self.embedding_path}")
@@ -71,7 +79,7 @@ class VectorStoreManager:
             return []
         try:
             # Define a filter function to match doc_id
             filter_fn = lambda metadata: metadata['doc_id'] == doc_id

                 allow_dangerous_deserialization=True
             )
         else:
+            '''logging.info("Creating new vector store")
             # Return an empty vector store; it will be populated when documents are added
             return FAISS.from_texts(
                 texts=[""],  # Dummy text to initialize
                 embedding=self.embedding_model,
                 metadatas=[{"source": "init", "doc_id": "init"}]
+            )'''
+            logging.info("Creating new vector store (unpopulated)")
+            return None
     def add_documents(self, documents):
         """
         metadatas = [{'source': doc['source'], 'doc_id': doc['doc_id']} for doc in documents]
         logging.info("Adding new documents to vector store")
+        if not self.vector_store:
+            self.vector_store = FAISS.from_texts(
+                texts=texts,
+                embedding=self.embedding_model,
+                metadatas=metadatas
+            )
+        else:
+            self.vector_store.add_texts(texts=texts, metadatas=metadatas)
         self.vector_store.save_local(self.embedding_path)
         logging.info(f"Vector store updated and saved to {self.embedding_path}")
             return []
         try:
+            query = " ".join(query.lower().split())
             # Define a filter function to match doc_id
             filter_fn = lambda metadata: metadata['doc_id'] == doc_id