Spaces:

MikeMann
/

PrototypGrundschutzChatbot

Paused

App Files Files Community

MikeMann commited on Dec 31, 2024

Commit

6f26ba4

1 Parent(s): 48efdec

Added RemoteRag with QWEN

Browse files

Files changed (1) hide show

app.py +94 -6

app.py CHANGED Viewed

@@ -18,6 +18,7 @@ import gradio as gr
 import threading
 import re
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.docstore import InMemoryDocstore
 from langchain_community.document_loaders import TextLoader
@@ -50,6 +51,7 @@ from huggingface_hub import login
 login(token=HF_KEY)
 vectorstore=None
 rerankingModel=None
@@ -67,6 +69,13 @@ class BSIChatbot:
     #    'rerank_model_path': 'domci/ColBERTv2-mmarco-de-0.1'
     # }
     llm_path = "meta-llama/Llama-3.2-3B-Instruct"
     word_and_embed_model_path = "intfloat/multilingual-e5-large-instruct"
     docs = "/home/user/app/docs"
@@ -284,7 +293,7 @@ class BSIChatbot:
         rerankingModel = RAGPretrainedModel.from_pretrained(self.rerankModelPath)
     #@spaces.GPU
-    def ragPrompt(self, query, rerankingStep, history):
         global rerankingModel
         prompt_in_chat_format = [
             {
@@ -352,11 +361,16 @@ class BSIChatbot:
         match = re.findall(pattern, final_prompt)
         self.images=match
-        #queryModel = HuggingFacePipeline(pipeline = self.llmpipeline)
-        generation_thread = threading.Thread(target=self.llmpipeline, args=(final_prompt,))
-        generation_thread.start()
-        return self.streamer
         #answer=self.queryLLM(final_prompt)
         #answer = self.llmpipeline(final_prompt)
@@ -370,6 +384,80 @@ class BSIChatbot:
         #print (f"Answer:\n{answer}")
     def returnImages(self):
         imageList = []
         for image in self.images:
@@ -441,7 +529,7 @@ if __name__ == '__main__':
             print(f"ragQuery hist -1:{history[-1].get('content')}")
             print(f"ragQuery hist 0:{history[0].get('content')}")
             print(f"fullHistory: {history}" )
-            bot_response = bot.ragPrompt(history[-1].get('content'), reranking, history)
             history.append({"role": "assistant", "content": ""})
             image_gallery = returnImages()

 import threading
 import re
+from openai import OpenAI
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.docstore import InMemoryDocstore
 from langchain_community.document_loaders import TextLoader
 login(token=HF_KEY)
+SAIA_KEY = SAIA_KEY
 vectorstore=None
 rerankingModel=None
     #    'rerank_model_path': 'domci/ColBERTv2-mmarco-de-0.1'
     # }
+    llm_base_url = "https://chat-ai.academiccloud.de/v1"
+    llm_remote_model = "qwen2.5-72b-instruct"
+    llm_client = OpenAI(
+        api_key = SAIA_KEY,
+        base_url = llm_base_url
+    )
     llm_path = "meta-llama/Llama-3.2-3B-Instruct"
     word_and_embed_model_path = "intfloat/multilingual-e5-large-instruct"
     docs = "/home/user/app/docs"
         rerankingModel = RAGPretrainedModel.from_pretrained(self.rerankModelPath)
     #@spaces.GPU
+    def ragPromptRemote(self, query, rerankingStep, history):
         global rerankingModel
         prompt_in_chat_format = [
             {
         match = re.findall(pattern, final_prompt)
         self.images=match
+        stream = llm_client.chat.completions.create(
+            messages=final_prompt,
+            model=llm_remote_model,
+            stream=True
+        )
+        return stream
+        #generation_thread = threading.Thread(target=self.llmpipeline, args=(final_prompt,))
+        #generation_thread.start()
+        #return self.streamer
         #answer=self.queryLLM(final_prompt)
         #answer = self.llmpipeline(final_prompt)
         #print (f"Answer:\n{answer}")
+    def ragPrompt(self, query, rerankingStep, history):
+        global rerankingModel
+        prompt_in_chat_format = [
+            {
+                "role": "system",
+                "content": """You are an helpful Chatbot for the BSI IT-Grundschutz. Using the information contained in the context,
+                give a comprehensive answer to the question.
+                Respond only to the question asked, response should be concise and relevant but also give some context to the question.
+                Provide the source document when relevant for the understanding.
+                If the answer cannot be deduced from the context, do not give an answer.""",
+            },
+            {
+                "role": "user",
+                "content": """Context:
+                {context}
+                ---
+                Chat-History:
+                {history}
+                ---
+                Now here is the question you need to answer.
+                Question: {question}""",
+            },
+        ]
+        RAG_PROMPT_TEMPLATE = self.llmtokenizer.apply_chat_template(
+            prompt_in_chat_format, tokenize=False, add_generation_prompt=True
+        )
+        retrieved_chunks = self.retrieveSimiliarEmbedding(query)
+        retrieved_chunks_text = []
+        #TODO Irgendwas stimmt hier mit den Listen nicht
+        for chunk in retrieved_chunks:
+            #TODO Hier noch was smarteres Überlegen für alle Header
+            if "Header 1" in chunk.metadata.keys():
+                retrieved_chunks_text.append(f"The Document is: '{chunk.metadata['source']}'\nHeader of the Section is: '{chunk.metadata['Header 1']}' and Content of it:{chunk.page_content}")
+            else:
+                retrieved_chunks_text.append(
+                    f"The Document is: '{chunk.metadata['source']}'\nImage Description is: ':{chunk.page_content}")
+        i=1
+        for chunk in retrieved_chunks_text:
+            print(f"Retrieved Chunk number {i}:\n{chunk}")
+            i=i+1
+        if rerankingStep==True:
+            if rerankingModel == None:
+                print ("initializing Reranker-Model..")
+                self.initializeRerankingModel()
+            print("Starting Reranking Chunks...")
+            rerankingModel
+            retrieved_chunks_text=rerankingModel.rerank(query, retrieved_chunks_text,k=5)
+            retrieved_chunks_text=[chunk["content"] for chunk in retrieved_chunks_text]
+            i = 1
+            for chunk in retrieved_chunks_text:
+                print(f"Reranked Chunk number {i}:\n{chunk}")
+                i = i + 1
+        context = "\nExtracted documents:\n"
+        context += "".join([doc for i, doc in enumerate(retrieved_chunks_text)])
+        #Alles außer letzte Useranfrage
+        final_prompt = RAG_PROMPT_TEMPLATE.format(
+            question=query, context=context, history=history[:-1]
+        )
+        print(f"Query:\n{final_prompt}")
+        pattern = r"Filename:(.*?);"
+        match = re.findall(pattern, final_prompt)
+        self.images=match
+        #queryModel = HuggingFacePipeline(pipeline = self.llmpipeline)
+        generation_thread = threading.Thread(target=self.llmpipeline, args=(final_prompt,))
+        generation_thread.start()
+        return self.streamer
     def returnImages(self):
         imageList = []
         for image in self.images:
             print(f"ragQuery hist -1:{history[-1].get('content')}")
             print(f"ragQuery hist 0:{history[0].get('content')}")
             print(f"fullHistory: {history}" )
+            bot_response = bot.ragPromptRemote(history[-1].get('content'), reranking, history)
             history.append({"role": "assistant", "content": ""})
             image_gallery = returnImages()