Spaces:

MikeMann
/

PrototypGrundschutzChatbot

Paused

App Files Files Community

MikeMann commited on Jan 1

Commit

53dbf30

1 Parent(s): fcaf284

added stepback-Prompting

Browse files

Files changed (1) hide show

app.py +125 -9

app.py CHANGED Viewed

@@ -291,8 +291,114 @@ class BSIChatbot:
         global rerankingModel
         rerankingModel = RAGPretrainedModel.from_pretrained(self.rerankModelPath)
     #@spaces.GPU
-    def ragPromptRemote(self, query, rerankingStep, history):
         global rerankingModel
         prompt_in_chat_format = [
             {
@@ -477,19 +583,26 @@ class BSIChatbot:
     def launchGr(self):
         gr.Interface.from_pipeline(self.llmpipeline).launch()
-    def queryRemoteLLM(self, systemPrompt, query):
-        chat_completion = self.llm_client.chat.completions.create(
-            messages=[{"role": "system", "content": systemRole},
-                      {"role": "user", "content": "Step-Back Frage, die neu gestellt werden soll: " + query}],
-            model=self.llm_remote_model,
-        )
         return Answer
     def stepBackPrompt(self, query):
         systemPrompt = """
         Sie sind ein Experte für den IT-Grundschutz des BSI.
         Ihre Aufgabe ist es, eine Frage neu zu formulieren und sie in eine
-        Rückfrage umzuformulieren, die nach einem Grundkonzept der Begrifflichkeit fragt.
         Hier sind ein paar Beispiele:
         Ursprüngliche Frage: Welche Bausteine werden auf einen Webserver angewendet?
@@ -516,6 +629,8 @@ if __name__ == '__main__':
     renewEmbeddings = False
     reranking = True
     bot = BSIChatbot()
     bot.initializeEmbeddingModel(renewEmbeddings)
     if reranking == True:
@@ -564,7 +679,8 @@ if __name__ == '__main__':
             print(f"DBG: ragQuery hist -1:{history[-1].get('content')}")
             print(f"DBG: ragQuery hist 0:{history[0].get('content')}")
             print(f"DBG: fullHistory: {history}" )
-            bot_response = bot.ragPromptRemote(history[-1].get('content'), reranking, history)
             history.append({"role": "assistant", "content": ""})
             image_gallery = returnImages()

         global rerankingModel
         rerankingModel = RAGPretrainedModel.from_pretrained(self.rerankModelPath)
+    def retrieval(query, rerankingStep):
+        retrieved_chunks = self.retrieveSimiliarEmbedding(query)
+        retrieved_chunks_text = []
+        # TODO Irgendwas stimmt hier mit den Listen nicht
+        for chunk in retrieved_chunks:
+            # TODO Hier noch was smarteres Überlegen für alle Header
+            if "Header 1" in chunk.metadata.keys():
+                retrieved_chunks_text.append(
+                    f"The Document is: '{chunk.metadata['source']}'\nHeader of the Section is: '{chunk.metadata['Header 1']}' and Content of it:{chunk.page_content}")
+            else:
+                retrieved_chunks_text.append(
+                    f"The Document is: '{chunk.metadata['source']}'\nImage Description is: ':{chunk.page_content}")
+        i = 1
+        for chunk in retrieved_chunks_text:
+            print(f"Retrieved Chunk number {i}:\n{chunk}")
+            i = i + 1
+        if rerankingStep == True:
+            if rerankingModel == None:
+                print("initializing Reranker-Model..")
+                self.initializeRerankingModel()
+            print("Starting Reranking Chunks...")
+            rerankingModel
+            retrieved_chunks_text = rerankingModel.rerank(query, retrieved_chunks_text, k=5)
+            retrieved_chunks_text = [chunk["content"] for chunk in retrieved_chunks_text]
+            i = 1
+            for chunk in retrieved_chunks_text:
+                print(f"Reranked Chunk number {i}:\n{chunk}")
+                i = i + 1
+        context = "\nExtracted documents:\n"
+        context += "".join([doc for i, doc in enumerate(retrieved_chunks_text)])
+        return query, context
+    def ragPromptNew(self, query, rerankingStep, history, stepBackPrompt):
+        global rerankingModel
+        prompt_in_chat_format = [
+            {
+                "role": "system",
+                "content": """You are an helpful Chatbot for the BSI IT-Grundschutz. Using the information contained in the context,
+                        give a comprehensive answer to the question.
+                        Respond only to the question asked, response should be concise and relevant but also give some context to the question.
+                        Provide the source document when relevant for the understanding.
+                        If the answer cannot be deduced from the context, do not give an answer.""",
+            },
+            {
+                "role": "user",
+                "content": """Context:
+                        {context}
+                        ---
+                        Chat-History:
+                        {history}
+                        ---
+                        Now here is the question you need to answer.
+                        Question: {question}""",
+            },
+        ]
+        # RAG_PROMPT_TEMPLATE = self.llmtokenizer.apply_chat_template(
+        #    prompt_in_chat_format, tokenize=False, add_generation_prompt=True
+        # )
+        # Alles außer letzte Useranfrage, Normaler Query
+        query, context = retrieval(query, True)
+        if stepBackPrompt == True:
+            stepBackQuery = stepBackPrompt(query)
+            stepBackQuery, stepBackContext = retrieval(stepBackQuery, True)
+            sysPrompt = """
+            You are an helpful Chatbot for the BSI IT-Grundschutz. Using the information contained in the context,
+                give a comprehensive answer to the question.
+                Respond only to the question asked, response should be concise and relevant but also give some context to the question.
+                Provide the source document when relevant for the understanding.
+                If the answer cannot be deduced from the context, do not give an answer.
+                """
+            stepBackAnswer = queryRemoteLLM(sysPrompt, stepBackQuery, True)
+            context += "Übergreifende Frage:" + stepBackQuery + "Übergreifender Context:" + stepBackAnswer
+        #def queryRemoteLLM(self, systemPrompt, query, summary):
+        prompt_in_chat_format[-1]["content"] = prompt_in_chat_format[-1]["content"].format(
+            question=query, context=context, history=history[:-1]
+        )
+        final_prompt = prompt_in_chat_format
+        # final_prompt = prompt_in_chat_format[-1]["content"].format(
+        #    question=query, context=context, history=history[:-1]
+        # )
+        print(f"Query:\n{final_prompt}")
+        pattern = r"Filename:(.*?);"
+        last_value = final_prompt[-1]["content"]
+        match = re.findall(pattern, last_value)
+        self.images = match
+        stream = self.llm_client.chat.completions.create(
+            messages=final_prompt,
+            model=self.llm_remote_model,
+            stream=True
+        )
+        return stream
     #@spaces.GPU
+    def ragPromptRemote(self, query, rerankingStep, history, stepBackPrompt):
         global rerankingModel
         prompt_in_chat_format = [
             {
     def launchGr(self):
         gr.Interface.from_pipeline(self.llmpipeline).launch()
+    def queryRemoteLLM(self, systemPrompt, query, summary):
+        if summary != True:
+            chat_completion = self.llm_client.chat.completions.create(
+                messages=[{"role": "system", "content": systemPrompt},
+                          {"role": "user", "content": "Step-Back Frage, die neu gestellt werden soll: " + query}],
+                model=self.llm_remote_model,
+            )
+        if summary == True:
+            chat_completion = self.llm_client.chat.completions.create(
+                messages=[{"role": "system", "content": systemPrompt},
+                          {"role": "user", "content": query}],
+                model=self.llm_remote_model,
+            )
         return Answer
     def stepBackPrompt(self, query):
         systemPrompt = """
         Sie sind ein Experte für den IT-Grundschutz des BSI.
         Ihre Aufgabe ist es, eine Frage neu zu formulieren und sie in eine
+        Stepback-Frage umzuformulieren, die nach einem Grundkonzept der Begrifflichkeit fragt.
         Hier sind ein paar Beispiele:
         Ursprüngliche Frage: Welche Bausteine werden auf einen Webserver angewendet?
     renewEmbeddings = False
     reranking = True
+    stepBackEnable = True
     bot = BSIChatbot()
     bot.initializeEmbeddingModel(renewEmbeddings)
     if reranking == True:
             print(f"DBG: ragQuery hist -1:{history[-1].get('content')}")
             print(f"DBG: ragQuery hist 0:{history[0].get('content')}")
             print(f"DBG: fullHistory: {history}" )
+            #bot_response = bot.ragPromptRemote(history[-1].get('content'), reranking, history)
+            bot_response = bot.ragPromptNew(history[-1].get('content'), reranking, history, stepBackEnable)
             history.append({"role": "assistant", "content": ""})
             image_gallery = returnImages()