MikeMann commited on
Commit
1ea56a7
·
1 Parent(s): 5396439

added EvalDataset Generation

Browse files
Files changed (1) hide show
  1. app.py +9 -203
app.py CHANGED
@@ -21,6 +21,7 @@ import re
21
  import csv
22
  import json
23
  import gc
 
24
 
25
  from openai import OpenAI
26
  from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -67,7 +68,6 @@ class BSIChatbot:
67
  llmpipeline = None
68
  llmtokenizer = None
69
  vectorstore = None
70
- streamer = None
71
  images = [None]
72
 
73
  # model_paths = {
@@ -94,6 +94,12 @@ class BSIChatbot:
94
  self.embedding_model = None
95
  #self.vectorstore: VectorStore = None
96
 
 
 
 
 
 
 
97
  def initializeEmbeddingModel(self, new_embedding):
98
  global vectorstore
99
  RAW_KNOWLEDGE_BASE = []
@@ -105,7 +111,7 @@ class BSIChatbot:
105
  #Embedding, Vector generation and storing:
106
  self.embedding_model = HuggingFaceEmbeddings(
107
  model_name=self.word_and_embed_model_path,
108
- multi_process=True,
109
  model_kwargs={"device": "cuda"},
110
  encode_kwargs={"normalize_embeddings": True}, # Set `True` for cosine similarity
111
  )
@@ -297,30 +303,6 @@ class BSIChatbot:
297
  #print(all_documents)
298
  return all_documents
299
 
300
- def initializeLLM(self):
301
- bnb_config = BitsAndBytesConfig(
302
- load_in_8bit=True,
303
- #bnb_8bit_use_double_quant=True,
304
- #bnb_8bit_quant_type="nf4",
305
- #bnb_8bit_compute_dtype=torch.bfloat16,
306
- )
307
- llm = AutoModelForCausalLM.from_pretrained(
308
- self.llm_path, quantization_config=bnb_config
309
- )
310
- self.llmtokenizer = AutoTokenizer.from_pretrained(self.llm_path)
311
- self.streamer=TextIteratorStreamer(self.llmtokenizer, skip_prompt=True)
312
- self.llmpipeline = pipeline(
313
- model=llm,
314
- tokenizer=self.llmtokenizer,
315
- task="text-generation",
316
- do_sample=True,
317
- temperature=0.7,
318
- repetition_penalty=1.1,
319
- return_full_text=False,
320
- streamer=self.streamer,
321
- max_new_tokens=500,
322
- )
323
-
324
  def queryLLM(self,query):
325
  #resp = self.llmpipeline(chat) Fixen
326
  return(self.llmpipeline(query)[0]["generated_text"])
@@ -506,185 +488,9 @@ class BSIChatbot:
506
  messages=final_prompt,
507
  model=self.llm_remote_model,
508
  stream=False)
 
509
  return answer, context
510
 
511
- #@spaces.GPU
512
- def ragPromptRemote(self, query, rerankingStep, history, stepBackPrompt):
513
- global rerankingModel
514
- prompt_in_chat_format = [
515
- {
516
- "role": "system",
517
- "content": """You are an helpful Chatbot for the BSI IT-Grundschutz. Using the information contained in the context,
518
- give a comprehensive answer to the question.
519
- Respond only to the question asked, response should be concise and relevant but also give some context to the question.
520
- Provide the source document when relevant for the understanding.
521
- If the answer cannot be deduced from the context, do not give an answer.""",
522
- },
523
- {
524
- "role": "user",
525
- "content": """Context:
526
- {context}
527
- ---
528
- Chat-History:
529
- {history}
530
- ---
531
- Now here is the question you need to answer.
532
-
533
- Question: {question}""",
534
- },
535
- ]
536
- #RAG_PROMPT_TEMPLATE = self.llmtokenizer.apply_chat_template(
537
- # prompt_in_chat_format, tokenize=False, add_generation_prompt=True
538
- #)
539
- retrieved_chunks = self.retrieveSimiliarEmbedding(query)
540
- retrieved_chunks_text = []
541
- #TODO Irgendwas stimmt hier mit den Listen nicht
542
- for chunk in retrieved_chunks:
543
- #TODO Hier noch was smarteres Überlegen für alle Header
544
- if "Header 1" in chunk.metadata.keys():
545
- retrieved_chunks_text.append(f"The Document is: '{chunk.metadata['source']}'\nHeader of the Section is: '{chunk.metadata['Header 1']}' and Content of it:{chunk.page_content}")
546
- else:
547
- retrieved_chunks_text.append(
548
- f"The Document is: '{chunk.metadata['source']}'\nImage Description is: ':{chunk.page_content}")
549
- i=1
550
- #newfor chunk in retrieved_chunks_text:
551
- #newprint(f"Retrieved Chunk number {i}:\n{chunk}")
552
- #newi=i+1
553
-
554
- if rerankingStep==True:
555
- if rerankingModel == None:
556
- print ("initializing Reranker-Model..")
557
- self.initializeRerankingModel()
558
- print("Starting Reranking Chunks...")
559
- rerankingModel
560
- retrieved_chunks_text=rerankingModel.rerank(query, retrieved_chunks_text,k=5)
561
- retrieved_chunks_text=[chunk["content"] for chunk in retrieved_chunks_text]
562
-
563
- i = 1
564
- #newfor chunk in retrieved_chunks_text:
565
- #newprint(f"Reranked Chunk number {i}:\n{chunk}")
566
- #newi = i + 1
567
-
568
- context = "\nExtracted documents:\n"
569
- context += "".join([doc for i, doc in enumerate(retrieved_chunks_text)])
570
- #Alles außer letzte Useranfrage
571
- prompt_in_chat_format[-1]["content"] = prompt_in_chat_format[-1]["content"].format(
572
- question=query, context=context, history=history[:-1]
573
- )
574
- final_prompt = prompt_in_chat_format
575
- #final_prompt = prompt_in_chat_format[-1]["content"].format(
576
- # question=query, context=context, history=history[:-1]
577
- #)
578
-
579
- print(f"Query:\n{final_prompt}")
580
- pattern = r"Filename:(.*?);"
581
- last_value = final_prompt[-1]["content"]
582
-
583
-
584
- match = re.findall(pattern, last_value)
585
- self.images=match
586
-
587
- stream = self.llm_client.chat.completions.create(
588
- messages=final_prompt,
589
- model=self.llm_remote_model,
590
- stream=True
591
- )
592
- return stream
593
- #generation_thread = threading.Thread(target=self.llmpipeline, args=(final_prompt,))
594
- #generation_thread.start()
595
-
596
- #return self.streamer
597
-
598
- #answer=self.queryLLM(final_prompt)
599
- #answer = self.llmpipeline(final_prompt)
600
- #for token in answer:
601
- # print (token["generated_text"])
602
- # yield token["generated_text"]
603
- # gen = queryModel.stream(final_prompt)
604
-
605
-
606
- #return gen
607
-
608
- #print (f"Answer:\n{answer}")
609
-
610
- def ragPrompt(self, query, rerankingStep, history):
611
- global rerankingModel
612
- prompt_in_chat_format = [
613
- {
614
- "role": "system",
615
- "content": """You are an helpful Chatbot for the BSI IT-Grundschutz. Using the information contained in the context,
616
- give a comprehensive answer to the question.
617
- Respond only to the question asked, response should be concise and relevant but also give some context to the question.
618
- Provide the source document when relevant for the understanding.
619
- If the answer cannot be deduced from the context, do not give an answer.""",
620
- },
621
- {
622
- "role": "user",
623
- "content": """Context:
624
- {context}
625
- ---
626
- Chat-History:
627
- {history}
628
- ---
629
- Now here is the question you need to answer.
630
-
631
- Question: {question}""",
632
- },
633
- ]
634
- RAG_PROMPT_TEMPLATE = self.llmtokenizer.apply_chat_template(
635
- prompt_in_chat_format, tokenize=False, add_generation_prompt=True
636
- )
637
- retrieved_chunks = self.retrieveSimiliarEmbedding(query)
638
- retrieved_chunks_text = []
639
- #TODO Irgendwas stimmt hier mit den Listen nicht
640
- for chunk in retrieved_chunks:
641
- #TODO Hier noch was smarteres Überlegen für alle Header
642
- if "Header 1" in chunk.metadata.keys():
643
- retrieved_chunks_text.append(f"The Document is: '{chunk.metadata['source']}'\nHeader of the Section is: '{chunk.metadata['Header 1']}' and Content of it:{chunk.page_content}")
644
- else:
645
- retrieved_chunks_text.append(
646
- f"The Document is: '{chunk.metadata['source']}'\nImage Description is: ':{chunk.page_content}")
647
- i=1
648
- for chunk in retrieved_chunks_text:
649
- #newprint(f"Retrieved Chunk number {i}:\n{chunk}")
650
- i=i+1
651
-
652
- if rerankingStep==True:
653
- if rerankingModel == None:
654
- print ("initializing Reranker-Model..")
655
- self.initializeRerankingModel()
656
- print("Starting Reranking Chunks...")
657
- rerankingModel
658
- retrieved_chunks_text=rerankingModel.rerank(query, retrieved_chunks_text,k=15)
659
- #REVERSE Rerank results!
660
- #newprint("DBG: Rankorder:")
661
- #newfor chunk in reversed(retrieved_chunks_text):
662
- #newprint(chunk.rank)
663
- retrieved_chunks_text=[chunk["content"] for chunk in reversed(retrieved_chunks_text)]
664
-
665
- i = 1
666
- for chunk in retrieved_chunks_text:
667
- print(f"Reranked Chunk number {i}:\n{chunk}")
668
- i = i + 1
669
-
670
- context = "\nExtracted documents:\n"
671
- context += "".join([doc for i, doc in enumerate(retrieved_chunks_text)])
672
- #Alles außer letzte Useranfrage
673
- final_prompt = RAG_PROMPT_TEMPLATE.format(
674
- question=query, context=context, history=history[:-1]
675
- )
676
-
677
- print(f"Query:\n{final_prompt}")
678
- pattern = r"Filename:(.*?);"
679
- match = re.findall(pattern, final_prompt)
680
- self.images=match
681
-
682
- #queryModel = HuggingFacePipeline(pipeline = self.llmpipeline)
683
- generation_thread = threading.Thread(target=self.llmpipeline, args=(final_prompt,))
684
- generation_thread.start()
685
-
686
- return self.streamer
687
-
688
  def returnImages(self):
689
  imageList = []
690
  for image in self.images:
 
21
  import csv
22
  import json
23
  import gc
24
+ import multiprocessing
25
 
26
  from openai import OpenAI
27
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
68
  llmpipeline = None
69
  llmtokenizer = None
70
  vectorstore = None
 
71
  images = [None]
72
 
73
  # model_paths = {
 
94
  self.embedding_model = None
95
  #self.vectorstore: VectorStore = None
96
 
97
+ def cleanResources(self):
98
+ multiprocessing.active_children()
99
+ multiprocessing.resource_tracker.unregister('Semaphore')
100
+ torch.cuda.empty_cache()
101
+ gc.collect()
102
+
103
  def initializeEmbeddingModel(self, new_embedding):
104
  global vectorstore
105
  RAW_KNOWLEDGE_BASE = []
 
111
  #Embedding, Vector generation and storing:
112
  self.embedding_model = HuggingFaceEmbeddings(
113
  model_name=self.word_and_embed_model_path,
114
+ multi_process=False,
115
  model_kwargs={"device": "cuda"},
116
  encode_kwargs={"normalize_embeddings": True}, # Set `True` for cosine similarity
117
  )
 
303
  #print(all_documents)
304
  return all_documents
305
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
  def queryLLM(self,query):
307
  #resp = self.llmpipeline(chat) Fixen
308
  return(self.llmpipeline(query)[0]["generated_text"])
 
488
  messages=final_prompt,
489
  model=self.llm_remote_model,
490
  stream=False)
491
+ self.cleanResources()
492
  return answer, context
493
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
494
  def returnImages(self):
495
  imageList = []
496
  for image in self.images: