added EvalDataset Generation
Browse files
app.py
CHANGED
@@ -21,6 +21,7 @@ import re
|
|
21 |
import csv
|
22 |
import json
|
23 |
import gc
|
|
|
24 |
|
25 |
from openai import OpenAI
|
26 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
@@ -67,7 +68,6 @@ class BSIChatbot:
|
|
67 |
llmpipeline = None
|
68 |
llmtokenizer = None
|
69 |
vectorstore = None
|
70 |
-
streamer = None
|
71 |
images = [None]
|
72 |
|
73 |
# model_paths = {
|
@@ -94,6 +94,12 @@ class BSIChatbot:
|
|
94 |
self.embedding_model = None
|
95 |
#self.vectorstore: VectorStore = None
|
96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
def initializeEmbeddingModel(self, new_embedding):
|
98 |
global vectorstore
|
99 |
RAW_KNOWLEDGE_BASE = []
|
@@ -105,7 +111,7 @@ class BSIChatbot:
|
|
105 |
#Embedding, Vector generation and storing:
|
106 |
self.embedding_model = HuggingFaceEmbeddings(
|
107 |
model_name=self.word_and_embed_model_path,
|
108 |
-
multi_process=
|
109 |
model_kwargs={"device": "cuda"},
|
110 |
encode_kwargs={"normalize_embeddings": True}, # Set `True` for cosine similarity
|
111 |
)
|
@@ -297,30 +303,6 @@ class BSIChatbot:
|
|
297 |
#print(all_documents)
|
298 |
return all_documents
|
299 |
|
300 |
-
def initializeLLM(self):
|
301 |
-
bnb_config = BitsAndBytesConfig(
|
302 |
-
load_in_8bit=True,
|
303 |
-
#bnb_8bit_use_double_quant=True,
|
304 |
-
#bnb_8bit_quant_type="nf4",
|
305 |
-
#bnb_8bit_compute_dtype=torch.bfloat16,
|
306 |
-
)
|
307 |
-
llm = AutoModelForCausalLM.from_pretrained(
|
308 |
-
self.llm_path, quantization_config=bnb_config
|
309 |
-
)
|
310 |
-
self.llmtokenizer = AutoTokenizer.from_pretrained(self.llm_path)
|
311 |
-
self.streamer=TextIteratorStreamer(self.llmtokenizer, skip_prompt=True)
|
312 |
-
self.llmpipeline = pipeline(
|
313 |
-
model=llm,
|
314 |
-
tokenizer=self.llmtokenizer,
|
315 |
-
task="text-generation",
|
316 |
-
do_sample=True,
|
317 |
-
temperature=0.7,
|
318 |
-
repetition_penalty=1.1,
|
319 |
-
return_full_text=False,
|
320 |
-
streamer=self.streamer,
|
321 |
-
max_new_tokens=500,
|
322 |
-
)
|
323 |
-
|
324 |
def queryLLM(self,query):
|
325 |
#resp = self.llmpipeline(chat) Fixen
|
326 |
return(self.llmpipeline(query)[0]["generated_text"])
|
@@ -506,185 +488,9 @@ class BSIChatbot:
|
|
506 |
messages=final_prompt,
|
507 |
model=self.llm_remote_model,
|
508 |
stream=False)
|
|
|
509 |
return answer, context
|
510 |
|
511 |
-
#@spaces.GPU
|
512 |
-
def ragPromptRemote(self, query, rerankingStep, history, stepBackPrompt):
|
513 |
-
global rerankingModel
|
514 |
-
prompt_in_chat_format = [
|
515 |
-
{
|
516 |
-
"role": "system",
|
517 |
-
"content": """You are an helpful Chatbot for the BSI IT-Grundschutz. Using the information contained in the context,
|
518 |
-
give a comprehensive answer to the question.
|
519 |
-
Respond only to the question asked, response should be concise and relevant but also give some context to the question.
|
520 |
-
Provide the source document when relevant for the understanding.
|
521 |
-
If the answer cannot be deduced from the context, do not give an answer.""",
|
522 |
-
},
|
523 |
-
{
|
524 |
-
"role": "user",
|
525 |
-
"content": """Context:
|
526 |
-
{context}
|
527 |
-
---
|
528 |
-
Chat-History:
|
529 |
-
{history}
|
530 |
-
---
|
531 |
-
Now here is the question you need to answer.
|
532 |
-
|
533 |
-
Question: {question}""",
|
534 |
-
},
|
535 |
-
]
|
536 |
-
#RAG_PROMPT_TEMPLATE = self.llmtokenizer.apply_chat_template(
|
537 |
-
# prompt_in_chat_format, tokenize=False, add_generation_prompt=True
|
538 |
-
#)
|
539 |
-
retrieved_chunks = self.retrieveSimiliarEmbedding(query)
|
540 |
-
retrieved_chunks_text = []
|
541 |
-
#TODO Irgendwas stimmt hier mit den Listen nicht
|
542 |
-
for chunk in retrieved_chunks:
|
543 |
-
#TODO Hier noch was smarteres Überlegen für alle Header
|
544 |
-
if "Header 1" in chunk.metadata.keys():
|
545 |
-
retrieved_chunks_text.append(f"The Document is: '{chunk.metadata['source']}'\nHeader of the Section is: '{chunk.metadata['Header 1']}' and Content of it:{chunk.page_content}")
|
546 |
-
else:
|
547 |
-
retrieved_chunks_text.append(
|
548 |
-
f"The Document is: '{chunk.metadata['source']}'\nImage Description is: ':{chunk.page_content}")
|
549 |
-
i=1
|
550 |
-
#newfor chunk in retrieved_chunks_text:
|
551 |
-
#newprint(f"Retrieved Chunk number {i}:\n{chunk}")
|
552 |
-
#newi=i+1
|
553 |
-
|
554 |
-
if rerankingStep==True:
|
555 |
-
if rerankingModel == None:
|
556 |
-
print ("initializing Reranker-Model..")
|
557 |
-
self.initializeRerankingModel()
|
558 |
-
print("Starting Reranking Chunks...")
|
559 |
-
rerankingModel
|
560 |
-
retrieved_chunks_text=rerankingModel.rerank(query, retrieved_chunks_text,k=5)
|
561 |
-
retrieved_chunks_text=[chunk["content"] for chunk in retrieved_chunks_text]
|
562 |
-
|
563 |
-
i = 1
|
564 |
-
#newfor chunk in retrieved_chunks_text:
|
565 |
-
#newprint(f"Reranked Chunk number {i}:\n{chunk}")
|
566 |
-
#newi = i + 1
|
567 |
-
|
568 |
-
context = "\nExtracted documents:\n"
|
569 |
-
context += "".join([doc for i, doc in enumerate(retrieved_chunks_text)])
|
570 |
-
#Alles außer letzte Useranfrage
|
571 |
-
prompt_in_chat_format[-1]["content"] = prompt_in_chat_format[-1]["content"].format(
|
572 |
-
question=query, context=context, history=history[:-1]
|
573 |
-
)
|
574 |
-
final_prompt = prompt_in_chat_format
|
575 |
-
#final_prompt = prompt_in_chat_format[-1]["content"].format(
|
576 |
-
# question=query, context=context, history=history[:-1]
|
577 |
-
#)
|
578 |
-
|
579 |
-
print(f"Query:\n{final_prompt}")
|
580 |
-
pattern = r"Filename:(.*?);"
|
581 |
-
last_value = final_prompt[-1]["content"]
|
582 |
-
|
583 |
-
|
584 |
-
match = re.findall(pattern, last_value)
|
585 |
-
self.images=match
|
586 |
-
|
587 |
-
stream = self.llm_client.chat.completions.create(
|
588 |
-
messages=final_prompt,
|
589 |
-
model=self.llm_remote_model,
|
590 |
-
stream=True
|
591 |
-
)
|
592 |
-
return stream
|
593 |
-
#generation_thread = threading.Thread(target=self.llmpipeline, args=(final_prompt,))
|
594 |
-
#generation_thread.start()
|
595 |
-
|
596 |
-
#return self.streamer
|
597 |
-
|
598 |
-
#answer=self.queryLLM(final_prompt)
|
599 |
-
#answer = self.llmpipeline(final_prompt)
|
600 |
-
#for token in answer:
|
601 |
-
# print (token["generated_text"])
|
602 |
-
# yield token["generated_text"]
|
603 |
-
# gen = queryModel.stream(final_prompt)
|
604 |
-
|
605 |
-
|
606 |
-
#return gen
|
607 |
-
|
608 |
-
#print (f"Answer:\n{answer}")
|
609 |
-
|
610 |
-
def ragPrompt(self, query, rerankingStep, history):
|
611 |
-
global rerankingModel
|
612 |
-
prompt_in_chat_format = [
|
613 |
-
{
|
614 |
-
"role": "system",
|
615 |
-
"content": """You are an helpful Chatbot for the BSI IT-Grundschutz. Using the information contained in the context,
|
616 |
-
give a comprehensive answer to the question.
|
617 |
-
Respond only to the question asked, response should be concise and relevant but also give some context to the question.
|
618 |
-
Provide the source document when relevant for the understanding.
|
619 |
-
If the answer cannot be deduced from the context, do not give an answer.""",
|
620 |
-
},
|
621 |
-
{
|
622 |
-
"role": "user",
|
623 |
-
"content": """Context:
|
624 |
-
{context}
|
625 |
-
---
|
626 |
-
Chat-History:
|
627 |
-
{history}
|
628 |
-
---
|
629 |
-
Now here is the question you need to answer.
|
630 |
-
|
631 |
-
Question: {question}""",
|
632 |
-
},
|
633 |
-
]
|
634 |
-
RAG_PROMPT_TEMPLATE = self.llmtokenizer.apply_chat_template(
|
635 |
-
prompt_in_chat_format, tokenize=False, add_generation_prompt=True
|
636 |
-
)
|
637 |
-
retrieved_chunks = self.retrieveSimiliarEmbedding(query)
|
638 |
-
retrieved_chunks_text = []
|
639 |
-
#TODO Irgendwas stimmt hier mit den Listen nicht
|
640 |
-
for chunk in retrieved_chunks:
|
641 |
-
#TODO Hier noch was smarteres Überlegen für alle Header
|
642 |
-
if "Header 1" in chunk.metadata.keys():
|
643 |
-
retrieved_chunks_text.append(f"The Document is: '{chunk.metadata['source']}'\nHeader of the Section is: '{chunk.metadata['Header 1']}' and Content of it:{chunk.page_content}")
|
644 |
-
else:
|
645 |
-
retrieved_chunks_text.append(
|
646 |
-
f"The Document is: '{chunk.metadata['source']}'\nImage Description is: ':{chunk.page_content}")
|
647 |
-
i=1
|
648 |
-
for chunk in retrieved_chunks_text:
|
649 |
-
#newprint(f"Retrieved Chunk number {i}:\n{chunk}")
|
650 |
-
i=i+1
|
651 |
-
|
652 |
-
if rerankingStep==True:
|
653 |
-
if rerankingModel == None:
|
654 |
-
print ("initializing Reranker-Model..")
|
655 |
-
self.initializeRerankingModel()
|
656 |
-
print("Starting Reranking Chunks...")
|
657 |
-
rerankingModel
|
658 |
-
retrieved_chunks_text=rerankingModel.rerank(query, retrieved_chunks_text,k=15)
|
659 |
-
#REVERSE Rerank results!
|
660 |
-
#newprint("DBG: Rankorder:")
|
661 |
-
#newfor chunk in reversed(retrieved_chunks_text):
|
662 |
-
#newprint(chunk.rank)
|
663 |
-
retrieved_chunks_text=[chunk["content"] for chunk in reversed(retrieved_chunks_text)]
|
664 |
-
|
665 |
-
i = 1
|
666 |
-
for chunk in retrieved_chunks_text:
|
667 |
-
print(f"Reranked Chunk number {i}:\n{chunk}")
|
668 |
-
i = i + 1
|
669 |
-
|
670 |
-
context = "\nExtracted documents:\n"
|
671 |
-
context += "".join([doc for i, doc in enumerate(retrieved_chunks_text)])
|
672 |
-
#Alles außer letzte Useranfrage
|
673 |
-
final_prompt = RAG_PROMPT_TEMPLATE.format(
|
674 |
-
question=query, context=context, history=history[:-1]
|
675 |
-
)
|
676 |
-
|
677 |
-
print(f"Query:\n{final_prompt}")
|
678 |
-
pattern = r"Filename:(.*?);"
|
679 |
-
match = re.findall(pattern, final_prompt)
|
680 |
-
self.images=match
|
681 |
-
|
682 |
-
#queryModel = HuggingFacePipeline(pipeline = self.llmpipeline)
|
683 |
-
generation_thread = threading.Thread(target=self.llmpipeline, args=(final_prompt,))
|
684 |
-
generation_thread.start()
|
685 |
-
|
686 |
-
return self.streamer
|
687 |
-
|
688 |
def returnImages(self):
|
689 |
imageList = []
|
690 |
for image in self.images:
|
|
|
21 |
import csv
|
22 |
import json
|
23 |
import gc
|
24 |
+
import multiprocessing
|
25 |
|
26 |
from openai import OpenAI
|
27 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
68 |
llmpipeline = None
|
69 |
llmtokenizer = None
|
70 |
vectorstore = None
|
|
|
71 |
images = [None]
|
72 |
|
73 |
# model_paths = {
|
|
|
94 |
self.embedding_model = None
|
95 |
#self.vectorstore: VectorStore = None
|
96 |
|
97 |
+
def cleanResources(self):
|
98 |
+
multiprocessing.active_children()
|
99 |
+
multiprocessing.resource_tracker.unregister('Semaphore')
|
100 |
+
torch.cuda.empty_cache()
|
101 |
+
gc.collect()
|
102 |
+
|
103 |
def initializeEmbeddingModel(self, new_embedding):
|
104 |
global vectorstore
|
105 |
RAW_KNOWLEDGE_BASE = []
|
|
|
111 |
#Embedding, Vector generation and storing:
|
112 |
self.embedding_model = HuggingFaceEmbeddings(
|
113 |
model_name=self.word_and_embed_model_path,
|
114 |
+
multi_process=False,
|
115 |
model_kwargs={"device": "cuda"},
|
116 |
encode_kwargs={"normalize_embeddings": True}, # Set `True` for cosine similarity
|
117 |
)
|
|
|
303 |
#print(all_documents)
|
304 |
return all_documents
|
305 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
306 |
def queryLLM(self,query):
|
307 |
#resp = self.llmpipeline(chat) Fixen
|
308 |
return(self.llmpipeline(query)[0]["generated_text"])
|
|
|
488 |
messages=final_prompt,
|
489 |
model=self.llm_remote_model,
|
490 |
stream=False)
|
491 |
+
self.cleanResources()
|
492 |
return answer, context
|
493 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
494 |
def returnImages(self):
|
495 |
imageList = []
|
496 |
for image in self.images:
|