Spaces:

MikeMann
/

PrototypGrundschutzChatbot

Paused

App Files Files Community

MikeMann commited on Jan 3

Commit

1ea56a7

1 Parent(s): 5396439

added EvalDataset Generation

Browse files

Files changed (1) hide show

app.py +9 -203

app.py CHANGED Viewed

@@ -21,6 +21,7 @@ import re
 import csv
 import json
 import gc
 from openai import OpenAI
 from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -67,7 +68,6 @@ class BSIChatbot:
     llmpipeline = None
     llmtokenizer = None
     vectorstore = None
-    streamer = None
     images = [None]
     # model_paths = {
@@ -94,6 +94,12 @@ class BSIChatbot:
         self.embedding_model = None
         #self.vectorstore: VectorStore = None
     def initializeEmbeddingModel(self, new_embedding):
         global vectorstore
         RAW_KNOWLEDGE_BASE = []
@@ -105,7 +111,7 @@ class BSIChatbot:
         #Embedding, Vector generation and storing:
         self.embedding_model = HuggingFaceEmbeddings(
             model_name=self.word_and_embed_model_path,
-            multi_process=True,
             model_kwargs={"device": "cuda"},
             encode_kwargs={"normalize_embeddings": True},  # Set `True` for cosine similarity
         )
@@ -297,30 +303,6 @@ class BSIChatbot:
         #print(all_documents)
         return all_documents
-    def initializeLLM(self):
-        bnb_config = BitsAndBytesConfig(
-            load_in_8bit=True,
-            #bnb_8bit_use_double_quant=True,
-            #bnb_8bit_quant_type="nf4",
-            #bnb_8bit_compute_dtype=torch.bfloat16,
-        )
-        llm = AutoModelForCausalLM.from_pretrained(
-            self.llm_path, quantization_config=bnb_config
-        )
-        self.llmtokenizer = AutoTokenizer.from_pretrained(self.llm_path)
-        self.streamer=TextIteratorStreamer(self.llmtokenizer, skip_prompt=True)
-        self.llmpipeline = pipeline(
-            model=llm,
-            tokenizer=self.llmtokenizer,
-            task="text-generation",
-            do_sample=True,
-            temperature=0.7,
-            repetition_penalty=1.1,
-            return_full_text=False,
-            streamer=self.streamer,
-            max_new_tokens=500,
-        )
     def queryLLM(self,query):
         #resp = self.llmpipeline(chat) Fixen
         return(self.llmpipeline(query)[0]["generated_text"])
@@ -506,185 +488,9 @@ class BSIChatbot:
                 messages=final_prompt,
                 model=self.llm_remote_model,
                 stream=False)
             return answer, context
-    #@spaces.GPU
-    def ragPromptRemote(self, query, rerankingStep, history, stepBackPrompt):
-        global rerankingModel
-        prompt_in_chat_format = [
-            {
-                "role": "system",
-                "content": """You are an helpful Chatbot for the BSI IT-Grundschutz. Using the information contained in the context,
-                give a comprehensive answer to the question.
-                Respond only to the question asked, response should be concise and relevant but also give some context to the question.
-                Provide the source document when relevant for the understanding.
-                If the answer cannot be deduced from the context, do not give an answer.""",
-            },
-            {
-                "role": "user",
-                "content": """Context:
-                {context}
-                ---
-                Chat-History:
-                {history}
-                ---
-                Now here is the question you need to answer.
-                Question: {question}""",
-            },
-        ]
-        #RAG_PROMPT_TEMPLATE = self.llmtokenizer.apply_chat_template(
-        #    prompt_in_chat_format, tokenize=False, add_generation_prompt=True
-        #)
-        retrieved_chunks = self.retrieveSimiliarEmbedding(query)
-        retrieved_chunks_text = []
-        #TODO Irgendwas stimmt hier mit den Listen nicht
-        for chunk in retrieved_chunks:
-            #TODO Hier noch was smarteres Überlegen für alle Header
-            if "Header 1" in chunk.metadata.keys():
-                retrieved_chunks_text.append(f"The Document is: '{chunk.metadata['source']}'\nHeader of the Section is: '{chunk.metadata['Header 1']}' and Content of it:{chunk.page_content}")
-            else:
-                retrieved_chunks_text.append(
-                    f"The Document is: '{chunk.metadata['source']}'\nImage Description is: ':{chunk.page_content}")
-        i=1
-        #newfor chunk in retrieved_chunks_text:
-            #newprint(f"Retrieved Chunk number {i}:\n{chunk}")
-            #newi=i+1
-        if rerankingStep==True:
-            if rerankingModel == None:
-                print ("initializing Reranker-Model..")
-                self.initializeRerankingModel()
-            print("Starting Reranking Chunks...")
-            rerankingModel
-            retrieved_chunks_text=rerankingModel.rerank(query, retrieved_chunks_text,k=5)
-            retrieved_chunks_text=[chunk["content"] for chunk in retrieved_chunks_text]
-            i = 1
-            #newfor chunk in retrieved_chunks_text:
-                #newprint(f"Reranked Chunk number {i}:\n{chunk}")
-                #newi = i + 1
-        context = "\nExtracted documents:\n"
-        context += "".join([doc for i, doc in enumerate(retrieved_chunks_text)])
-        #Alles außer letzte Useranfrage
-        prompt_in_chat_format[-1]["content"] = prompt_in_chat_format[-1]["content"].format(
-            question=query, context=context, history=history[:-1]
-        )
-        final_prompt = prompt_in_chat_format
-        #final_prompt = prompt_in_chat_format[-1]["content"].format(
-        #    question=query, context=context, history=history[:-1]
-        #)
-        print(f"Query:\n{final_prompt}")
-        pattern = r"Filename:(.*?);"
-        last_value = final_prompt[-1]["content"]
-        match = re.findall(pattern, last_value)
-        self.images=match
-        stream = self.llm_client.chat.completions.create(
-            messages=final_prompt,
-            model=self.llm_remote_model,
-            stream=True
-        )
-        return stream
-        #generation_thread = threading.Thread(target=self.llmpipeline, args=(final_prompt,))
-        #generation_thread.start()
-        #return self.streamer
-        #answer=self.queryLLM(final_prompt)
-        #answer = self.llmpipeline(final_prompt)
-        #for token in answer:
-        #    print (token["generated_text"])
-        #    yield token["generated_text"]
-        # gen = queryModel.stream(final_prompt)
-        #return gen
-        #print (f"Answer:\n{answer}")
-    def ragPrompt(self, query, rerankingStep, history):
-        global rerankingModel
-        prompt_in_chat_format = [
-            {
-                "role": "system",
-                "content": """You are an helpful Chatbot for the BSI IT-Grundschutz. Using the information contained in the context,
-                give a comprehensive answer to the question.
-                Respond only to the question asked, response should be concise and relevant but also give some context to the question.
-                Provide the source document when relevant for the understanding.
-                If the answer cannot be deduced from the context, do not give an answer.""",
-            },
-            {
-                "role": "user",
-                "content": """Context:
-                {context}
-                ---
-                Chat-History:
-                {history}
-                ---
-                Now here is the question you need to answer.
-                Question: {question}""",
-            },
-        ]
-        RAG_PROMPT_TEMPLATE = self.llmtokenizer.apply_chat_template(
-            prompt_in_chat_format, tokenize=False, add_generation_prompt=True
-        )
-        retrieved_chunks = self.retrieveSimiliarEmbedding(query)
-        retrieved_chunks_text = []
-        #TODO Irgendwas stimmt hier mit den Listen nicht
-        for chunk in retrieved_chunks:
-            #TODO Hier noch was smarteres Überlegen für alle Header
-            if "Header 1" in chunk.metadata.keys():
-                retrieved_chunks_text.append(f"The Document is: '{chunk.metadata['source']}'\nHeader of the Section is: '{chunk.metadata['Header 1']}' and Content of it:{chunk.page_content}")
-            else:
-                retrieved_chunks_text.append(
-                    f"The Document is: '{chunk.metadata['source']}'\nImage Description is: ':{chunk.page_content}")
-        i=1
-        for chunk in retrieved_chunks_text:
-            #newprint(f"Retrieved Chunk number {i}:\n{chunk}")
-            i=i+1
-        if rerankingStep==True:
-            if rerankingModel == None:
-                print ("initializing Reranker-Model..")
-                self.initializeRerankingModel()
-            print("Starting Reranking Chunks...")
-            rerankingModel
-            retrieved_chunks_text=rerankingModel.rerank(query, retrieved_chunks_text,k=15)
-            #REVERSE Rerank results!
-            #newprint("DBG: Rankorder:")
-            #newfor chunk in reversed(retrieved_chunks_text):
-                #newprint(chunk.rank)
-            retrieved_chunks_text=[chunk["content"] for chunk in reversed(retrieved_chunks_text)]
-            i = 1
-            for chunk in retrieved_chunks_text:
-                print(f"Reranked Chunk number {i}:\n{chunk}")
-                i = i + 1
-        context = "\nExtracted documents:\n"
-        context += "".join([doc for i, doc in enumerate(retrieved_chunks_text)])
-        #Alles außer letzte Useranfrage
-        final_prompt = RAG_PROMPT_TEMPLATE.format(
-            question=query, context=context, history=history[:-1]
-        )
-        print(f"Query:\n{final_prompt}")
-        pattern = r"Filename:(.*?);"
-        match = re.findall(pattern, final_prompt)
-        self.images=match
-        #queryModel = HuggingFacePipeline(pipeline = self.llmpipeline)
-        generation_thread = threading.Thread(target=self.llmpipeline, args=(final_prompt,))
-        generation_thread.start()
-        return self.streamer
     def returnImages(self):
         imageList = []
         for image in self.images:

 import csv
 import json
 import gc
+import multiprocessing
 from openai import OpenAI
 from langchain.text_splitter import RecursiveCharacterTextSplitter
     llmpipeline = None
     llmtokenizer = None
     vectorstore = None
     images = [None]
     # model_paths = {
         self.embedding_model = None
         #self.vectorstore: VectorStore = None
+    def cleanResources(self):
+        multiprocessing.active_children()
+        multiprocessing.resource_tracker.unregister('Semaphore')
+        torch.cuda.empty_cache()
+        gc.collect()
     def initializeEmbeddingModel(self, new_embedding):
         global vectorstore
         RAW_KNOWLEDGE_BASE = []
         #Embedding, Vector generation and storing:
         self.embedding_model = HuggingFaceEmbeddings(
             model_name=self.word_and_embed_model_path,
+            multi_process=False,
             model_kwargs={"device": "cuda"},
             encode_kwargs={"normalize_embeddings": True},  # Set `True` for cosine similarity
         )
         #print(all_documents)
         return all_documents
     def queryLLM(self,query):
         #resp = self.llmpipeline(chat) Fixen
         return(self.llmpipeline(query)[0]["generated_text"])
                 messages=final_prompt,
                 model=self.llm_remote_model,
                 stream=False)
+            self.cleanResources()
             return answer, context
     def returnImages(self):
         imageList = []
         for image in self.images: