smu-ai

Build error

App Files Files Community

dh-mc commited on Aug 27, 2023

Commit

4ce9985

1 Parent(s): 4ae9830

lib search API ready

Browse files

Files changed (8) hide show

app_modules/init.py +16 -2
app_modules/llm_chat_chain.py +1 -1
app_modules/llm_inference.py +12 -5
app_modules/llm_qa_chain.py +12 -3
app_modules/llm_summarize_chain.py +1 -1
server.py +13 -6
test.py +7 -4
web +1 -1

app_modules/init.py CHANGED Viewed

@@ -79,14 +79,28 @@ def app_init(initQAChain: bool = True):
             print(f"Completed in {end - start:.3f}s")
-            vectorstore = load_vectorstor(index_path)
         start = timer()
         llm_loader = LLMLoader(llm_model_type)
         llm_loader.init(
             n_threds=n_threds, hf_pipeline_device_type=hf_pipeline_device_type
         )
-        qa_chain = QAChain(vectorstore, llm_loader) if initQAChain else None
         end = timer()
         print(f"Completed in {end - start:.3f}s")

             print(f"Completed in {end - start:.3f}s")
+            vectorstore = load_vectorstor(using_faiss, index_path, embeddings)
+            doc_id_to_vectorstore_mapping = {}
+            rootdir = index_path
+            for file in os.listdir(rootdir):
+                d = os.path.join(rootdir, file)
+                if os.path.isdir(d):
+                    v = load_vectorstor(using_faiss, d, embeddings)
+                    doc_id_to_vectorstore_mapping[file] = v
+            # print(doc_id_to_vectorstore_mapping)
         start = timer()
         llm_loader = LLMLoader(llm_model_type)
         llm_loader.init(
             n_threds=n_threds, hf_pipeline_device_type=hf_pipeline_device_type
         )
+        qa_chain = (
+            QAChain(vectorstore, llm_loader, doc_id_to_vectorstore_mapping)
+            if initQAChain
+            else None
+        )
         end = timer()
         print(f"Completed in {end - start:.3f}s")

app_modules/llm_chat_chain.py CHANGED Viewed

@@ -27,7 +27,7 @@ class ChatChain(LLMInference):
     def __init__(self, llm_loader):
         super().__init__(llm_loader)
-    def create_chain(self) -> Chain:
         template = (
             get_llama_2_prompt_template()
             if os.environ.get("USE_LLAMA_2_PROMPT_TEMPLATE") == "true"

     def __init__(self, llm_loader):
         super().__init__(llm_loader)
+    def create_chain(self, inputs) -> Chain:
         template = (
             get_llama_2_prompt_template()
             if os.environ.get("USE_LLAMA_2_PROMPT_TEMPLATE") == "true"

app_modules/llm_inference.py CHANGED Viewed

@@ -22,12 +22,12 @@ class LLMInference(metaclass=abc.ABCMeta):
         self.chain = None
     @abc.abstractmethod
-    def create_chain(self) -> Chain:
         pass
-    def get_chain(self) -> Chain:
         if self.chain is None:
-            self.chain = self.create_chain()
         return self.chain
@@ -48,7 +48,7 @@ class LLMInference(metaclass=abc.ABCMeta):
         try:
             self.llm_loader.streamer.reset(q)
-            chain = self.get_chain()
             result = (
                 self._run_chain_with_streaming_handler(
                     chain, inputs, streaming_handler, testing
@@ -61,7 +61,14 @@ class LLMInference(metaclass=abc.ABCMeta):
                 result["answer"] = remove_extra_spaces(result["answer"])
                 source_path = os.environ.get("SOURCE_PATH")
-                if source_path is not None and len(source_path) > 0:
                     documents = result["source_documents"]
                     for doc in documents:
                         source = doc.metadata["source"]

         self.chain = None
     @abc.abstractmethod
+    def create_chain(self, inputs) -> Chain:
         pass
+    def get_chain(self, inputs) -> Chain:
         if self.chain is None:
+            self.chain = self.create_chain(inputs)
         return self.chain
         try:
             self.llm_loader.streamer.reset(q)
+            chain = self.get_chain(inputs)
             result = (
                 self._run_chain_with_streaming_handler(
                     chain, inputs, streaming_handler, testing
                 result["answer"] = remove_extra_spaces(result["answer"])
                 source_path = os.environ.get("SOURCE_PATH")
+                base_url = os.environ.get("PDF_FILE_BASE_URL")
+                if base_url is not None and len(base_url) > 0:
+                    documents = result["source_documents"]
+                    for doc in documents:
+                        source = doc.metadata["source"]
+                        title = source.split("/")[-1]
+                        doc.metadata["url"] = f"{base_url}{urllib.parse.quote(title)}"
+                elif source_path is not None and len(source_path) > 0:
                     documents = result["source_documents"]
                     for doc in documents:
                         source = doc.metadata["source"]

app_modules/llm_qa_chain.py CHANGED Viewed

@@ -8,14 +8,23 @@ from app_modules.llm_inference import LLMInference
 class QAChain(LLMInference):
     vectorstore: VectorStore
-    def __init__(self, vectorstore, llm_loader):
         super().__init__(llm_loader)
         self.vectorstore = vectorstore
-    def create_chain(self) -> Chain:
         qa = ConversationalRetrievalChain.from_llm(
             self.llm_loader.llm,
-            self.vectorstore.as_retriever(search_kwargs=self.llm_loader.search_kwargs),
             max_tokens_limit=self.llm_loader.max_tokens_limit,
             return_source_documents=True,
         )

 class QAChain(LLMInference):
     vectorstore: VectorStore
+    def __init__(self, vectorstore, llm_loader, doc_id_to_vectorstore_mapping=None):
         super().__init__(llm_loader)
         self.vectorstore = vectorstore
+        self.doc_id_to_vectorstore_mapping = doc_id_to_vectorstore_mapping
+    def get_chain(self, inputs) -> Chain:
+        return self.create_chain(inputs)
+    def create_chain(self, inputs) -> Chain:
+        vectorstore = self.vectorstore
+        if "chat_id" in inputs:
+            if inputs["chat_id"] in self.doc_id_to_vectorstore_mapping:
+                vectorstore = self.doc_id_to_vectorstore_mapping[inputs["chat_id"]]
         qa = ConversationalRetrievalChain.from_llm(
             self.llm_loader.llm,
+            vectorstore.as_retriever(search_kwargs=self.llm_loader.search_kwargs),
             max_tokens_limit=self.llm_loader.max_tokens_limit,
             return_source_documents=True,
         )

app_modules/llm_summarize_chain.py CHANGED Viewed

@@ -23,7 +23,7 @@ class SummarizeChain(LLMInference):
     def __init__(self, llm_loader):
         super().__init__(llm_loader)
-    def create_chain(self) -> Chain:
         use_llama_2_prompt_template = (
             os.environ.get("USE_LLAMA_2_PROMPT_TEMPLATE") == "true"
         )

     def __init__(self, llm_loader):
         super().__init__(llm_loader)
+    def create_chain(self, inputs) -> Chain:
         use_llama_2_prompt_template = (
             os.environ.get("USE_LLAMA_2_PROMPT_TEMPLATE") == "true"
         )

server.py CHANGED Viewed

@@ -28,11 +28,11 @@ class ChatResponse(BaseModel):
 def do_chat(
     question: str,
-    history: Optional[List] = [],
     chat_id: Optional[str] = None,
     streaming_handler: any = None,
 ):
-    if chat_id is None:
         chat_history = []
         if chat_history_enabled:
             for element in history:
@@ -41,7 +41,8 @@ def do_chat(
         start = timer()
         result = qa_chain.call_chain(
-            {"question": question, "chat_history": chat_history}, streaming_handler
         )
         end = timer()
         print(f"Completed in {end - start:.3f}s")
@@ -61,20 +62,26 @@ def do_chat(
 @serving(websocket=True)
 def chat(
-    question: str, history: Optional[List] = [], chat_id: Optional[str] = None, **kwargs
 ) -> str:
     print("question@chat:", question)
     streaming_handler = kwargs.get("streaming_handler")
     result = do_chat(question, history, chat_id, streaming_handler)
     resp = ChatResponse(
-        sourceDocs=result["source_documents"] if chat_id is None else []
     )
     return json.dumps(resp.dict())
 @serving
 def chat_sync(
-    question: str, history: Optional[List] = [], chat_id: Optional[str] = None, **kwargs
 ) -> str:
     print("question@chat_sync:", question)
     result = do_chat(question, history, chat_id, None)

 def do_chat(
     question: str,
+    history: Optional[List] = None,
     chat_id: Optional[str] = None,
     streaming_handler: any = None,
 ):
+    if history is not None:
         chat_history = []
         if chat_history_enabled:
             for element in history:
         start = timer()
         result = qa_chain.call_chain(
+            {"question": question, "chat_history": chat_history, "chat_id": chat_id},
+            streaming_handler,
         )
         end = timer()
         print(f"Completed in {end - start:.3f}s")
 @serving(websocket=True)
 def chat(
+    question: str,
+    history: Optional[List] = None,
+    chat_id: Optional[str] = None,
+    **kwargs,
 ) -> str:
     print("question@chat:", question)
     streaming_handler = kwargs.get("streaming_handler")
     result = do_chat(question, history, chat_id, streaming_handler)
     resp = ChatResponse(
+        sourceDocs=result["source_documents"] if history is not None else []
     )
     return json.dumps(resp.dict())
 @serving
 def chat_sync(
+    question: str,
+    history: Optional[List] = None,
+    chat_id: Optional[str] = None,
+    **kwargs,
 ) -> str:
     print("question@chat_sync:", question)
     result = do_chat(question, history, chat_id, None)

test.py CHANGED Viewed

@@ -30,6 +30,7 @@ class MyCustomHandler(BaseCallbackHandler):
 chatting = len(sys.argv) > 1 and sys.argv[1] == "chat"
 questions_file_path = os.environ.get("QUESTIONS_FILE_PATH")
 chat_history_enabled = os.environ.get("CHAT_HISTORY_ENABLED") or "true"
@@ -68,8 +69,9 @@ while True:
     custom_handler.reset()
     start = timer()
     result = qa_chain.call_chain(
-        {"question": query, "chat_history": chat_history},
         custom_handler,
         None,
         True,
@@ -87,13 +89,14 @@ while True:
     if standalone_question is not None:
         print(f"Load relevant documents for standalone question: {standalone_question}")
         start = timer()
-        qa = qa_chain.get_chain()
         docs = qa.retriever.get_relevant_documents(standalone_question)
         end = timer()
-        # print(docs)
         print(f"Completed in {end - start:.3f}s")
     if chat_history_enabled == "true":
         chat_history.append((query, result["answer"]))

 chatting = len(sys.argv) > 1 and sys.argv[1] == "chat"
+chat_id = sys.argv[2] if len(sys.argv) > 2 else None
 questions_file_path = os.environ.get("QUESTIONS_FILE_PATH")
 chat_history_enabled = os.environ.get("CHAT_HISTORY_ENABLED") or "true"
     custom_handler.reset()
     start = timer()
+    inputs = {"question": query, "chat_history": chat_history, "chat_id": chat_id}
     result = qa_chain.call_chain(
+        inputs,
         custom_handler,
         None,
         True,
     if standalone_question is not None:
         print(f"Load relevant documents for standalone question: {standalone_question}")
         start = timer()
+        qa = qa_chain.get_chain(inputs)
         docs = qa.retriever.get_relevant_documents(standalone_question)
         end = timer()
         print(f"Completed in {end - start:.3f}s")
+        if chatting:
+            print(docs)
     if chat_history_enabled == "true":
         chat_history.append((query, result["answer"]))

web CHANGED Viewed

	@@ -1 +1 @@
1	- Subproject commit ~~a6e3dd97a3cb23eb06f8ad94644aa5b71e624f61~~


1	+ Subproject commit 15f2b72afe6170badfb982c7adba585af30d578a