|
import os |
|
os.environ["CUDA_VISIBLE_DEVICES"] = "" |
|
os.environ["allow_dangerous_deserialization"] = "True" |
|
print(os.getcwd()) |
|
embedding_path="/home/user/app/docs/_embeddings/index.faiss" |
|
print(f"Loading FAISS index from: {embedding_path}") |
|
if not os.path.exists(embedding_path): |
|
print("File not found!") |
|
HF_KEY=os.getenv('Gated_Repo') |
|
|
|
import spaces |
|
import time |
|
from typing import final |
|
import asyncio |
|
|
|
import torch |
|
import gradio as gr |
|
import threading |
|
import re |
|
|
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain_community.docstore import InMemoryDocstore |
|
from langchain_community.document_loaders import TextLoader |
|
from langchain.docstore.document import Document as LangchainDocument |
|
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline |
|
from langchain_core.indexing import index |
|
from langchain_core.vectorstores import VectorStore |
|
from llama_index.core.node_parser import TextSplitter |
|
from llama_index.legacy.vector_stores import FaissVectorStore |
|
from pycparser.ply.yacc import token |
|
from ragatouille import RAGPretrainedModel |
|
|
|
from langchain_text_splitters import MarkdownHeaderTextSplitter, CharacterTextSplitter |
|
from sentence_transformers import SentenceTransformer |
|
from sqlalchemy.testing.suite.test_reflection import metadata |
|
from sympy.solvers.diophantine.diophantine import length |
|
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextIteratorStreamer |
|
from transformers import pipeline |
|
|
|
|
|
import faiss |
|
from langchain_community.vectorstores import FAISS |
|
|
|
from langchain_huggingface import HuggingFaceEmbeddings |
|
from langchain_community.vectorstores.utils import DistanceStrategy |
|
from huggingface_hub import login |
|
|
|
|
|
|
|
|
|
login(token=HF_KEY) |
|
|
|
vectorstore=None |
|
rerankingModel=None |
|
|
|
class BSIChatbot: |
|
embedding_model = None |
|
llmpipeline = None |
|
llmtokenizer = None |
|
vectorstore = None |
|
streamer = None |
|
images = [None] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llm_path = "meta-llama/Llama-3.2-3B-Instruct" |
|
word_and_embed_model_path = "intfloat/multilingual-e5-large-instruct" |
|
docs = "/home/user/app/docs" |
|
|
|
rerankModelPath="AdrienB134/ColBERTv1.0-german-mmarcoDE" |
|
embedPath="/home/user/app/docs/_embeddings" |
|
|
|
def __init__(self): |
|
self.embedding_model = None |
|
|
|
|
|
def initializeEmbeddingModel(self, new_embedding): |
|
global vectorstore |
|
RAW_KNOWLEDGE_BASE = [] |
|
|
|
|
|
self.embedding_model = HuggingFaceEmbeddings( |
|
model_name=self.word_and_embed_model_path, |
|
multi_process=True, |
|
model_kwargs={"device": "cuda"}, |
|
encode_kwargs={"normalize_embeddings": True}, |
|
) |
|
|
|
|
|
|
|
|
|
dirList = os.listdir(self.docs) |
|
if (new_embedding==True): |
|
for doc in dirList: |
|
print(doc) |
|
if (".md" in doc): |
|
|
|
file = open(self.docs + "\\" + doc, 'r', encoding='utf-8') |
|
doctxt = file.read() |
|
RAW_KNOWLEDGE_BASE.append(LangchainDocument(page_content=doctxt, metadata={"source": doc})) |
|
file.close() |
|
if (".txt" in doc): |
|
file = open(self.docs + "\\" + doc, 'r', encoding='cp1252') |
|
doctxt = file.read() |
|
if doc.replace(".txt",".png") in dirList: |
|
RAW_KNOWLEDGE_BASE.append(LangchainDocument(page_content=doctxt, metadata={"source": doc.replace(".txt",".png")})) |
|
if doc.replace(".txt",".jpg") in dirList: |
|
RAW_KNOWLEDGE_BASE.append(LangchainDocument(page_content=doctxt, metadata={"source": doc.replace(".txt",".jpg")})) |
|
file.close() |
|
|
|
|
|
|
|
|
|
|
|
|
|
headers_to_split_on = [ |
|
("#", "Header 1"), |
|
("##", "Header 2"), |
|
("###", "Header 3"), |
|
("####", "Header 4"), |
|
("#####", "Header 5"), |
|
] |
|
|
|
markdown_splitter = MarkdownHeaderTextSplitter( |
|
headers_to_split_on=headers_to_split_on, |
|
strip_headers=True |
|
) |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(self.word_and_embed_model_path) |
|
|
|
text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer( |
|
tokenizer=tokenizer, |
|
chunk_size=512, |
|
chunk_overlap=0, |
|
add_start_index=True, |
|
strip_whitespace=True, |
|
) |
|
|
|
|
|
docs_processed = [] |
|
for doc in RAW_KNOWLEDGE_BASE: |
|
print(f"Word-Length in doc:{len(doc.page_content.split())}") |
|
doc_cache = markdown_splitter.split_text(doc.page_content) |
|
|
|
doc_cache = text_splitter.split_documents(doc_cache) |
|
|
|
for chunk in doc_cache: |
|
chunk.metadata.update({"source": doc.metadata['source']}) |
|
print(f"Chunk_Debug len: {len(chunk.page_content.split())} and Chunk:{chunk}") |
|
|
|
|
|
docs_processed += doc_cache |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print(f"Docs processed:{len(docs_processed)}") |
|
|
|
|
|
|
|
|
|
lengths = [len(s.page_content) for s in docs_processed] |
|
print(max(lengths)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
start = time.time() |
|
|
|
|
|
vectorstore = FAISS.from_documents(docs_processed, self.embedding_model, distance_strategy=DistanceStrategy.COSINE) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vectorstore.save_local(self.embedPath) |
|
|
|
end = time.time() |
|
print("Saving Embeddings took", end-start, "seconds!") |
|
else: |
|
start = time.time() |
|
vectorstore = FAISS.load_local(self.embedPath, self.embedding_model, allow_dangerous_deserialization=True) |
|
|
|
end = time.time() |
|
|
|
print("Loading Embeddings took", end - start, "seconds!") |
|
|
|
def retrieveSimiliarEmbedding(self, query): |
|
global vectorstore |
|
print("Retrieving Embeddings...") |
|
start = time.time() |
|
query = f"Instruct: Given a search query, retrieve the relevant passages that answer the query\nQuery:{query}" |
|
|
|
|
|
|
|
retrieved_chunks = vectorstore.similarity_search(query=query, k=20) |
|
|
|
|
|
|
|
|
|
|
|
end = time.time() |
|
print("Retrieving Chunks with similiar embeddings took", end - start, "seconds!") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print(f"printing first chunk to see whats inside: {retrieved_chunks[0]}") |
|
return retrieved_chunks |
|
|
|
def initializeLLM(self): |
|
bnb_config = BitsAndBytesConfig( |
|
load_in_8bit=True, |
|
|
|
|
|
|
|
) |
|
llm = AutoModelForCausalLM.from_pretrained( |
|
self.llm_path, quantization_config=bnb_config |
|
) |
|
self.llmtokenizer = AutoTokenizer.from_pretrained(self.llm_path) |
|
self.streamer=TextIteratorStreamer(self.llmtokenizer, skip_prompt=True) |
|
self.llmpipeline = pipeline( |
|
model=llm, |
|
tokenizer=self.llmtokenizer, |
|
task="text-generation", |
|
do_sample=True, |
|
temperature=0.7, |
|
repetition_penalty=1.1, |
|
return_full_text=False, |
|
streamer=self.streamer, |
|
max_new_tokens=500, |
|
) |
|
|
|
def queryLLM(self,query): |
|
|
|
return(self.llmpipeline(query)[0]["generated_text"]) |
|
|
|
def initializeRerankingModel(self): |
|
global rerankingModel |
|
rerankingModel = RAGPretrainedModel.from_pretrained(self.rerankModelPath) |
|
|
|
@spaces.GPU |
|
def ragPrompt(self, query, rerankingStep, history): |
|
prompt_in_chat_format = [ |
|
{ |
|
"role": "system", |
|
"content": """You are an helpful Chatbot for the BSI IT-Grundschutz. Using the information contained in the context, |
|
give a comprehensive answer to the question. |
|
Respond only to the question asked, response should be concise and relevant but also give some context to the question. |
|
Provide the source document when relevant for the understanding. |
|
If the answer cannot be deduced from the context, do not give an answer.""", |
|
}, |
|
{ |
|
"role": "user", |
|
"content": """Context: |
|
{context} |
|
--- |
|
Chat-History: |
|
{history} |
|
--- |
|
Now here is the question you need to answer. |
|
|
|
Question: {question}""", |
|
}, |
|
] |
|
RAG_PROMPT_TEMPLATE = self.llmtokenizer.apply_chat_template( |
|
prompt_in_chat_format, tokenize=False, add_generation_prompt=True |
|
) |
|
retrieved_chunks = self.retrieveSimiliarEmbedding(query) |
|
retrieved_chunks_text = [] |
|
|
|
for chunk in retrieved_chunks: |
|
|
|
if "Header 1" in chunk.metadata.keys(): |
|
retrieved_chunks_text.append(f"The Document is: '{chunk.metadata['source']}'\nHeader of the Section is: '{chunk.metadata['Header 1']}' and Content of it:{chunk.page_content}") |
|
else: |
|
retrieved_chunks_text.append( |
|
f"The Document is: '{chunk.metadata['source']}'\nImage Description is: ':{chunk.page_content}") |
|
i=1 |
|
for chunk in retrieved_chunks_text: |
|
print(f"Retrieved Chunk number {i}:\n{chunk}") |
|
i=i+1 |
|
|
|
if rerankingStep==True: |
|
if rerankingModel == None: |
|
print ("initializing Reranker-Model..") |
|
self.initializeRerankingModel() |
|
print("Starting Reranking Chunks...") |
|
rerankingModel |
|
retrieved_chunks_text=self.rerankingModel.rerank(query, retrieved_chunks_text,k=5) |
|
retrieved_chunks_text=[chunk["content"] for chunk in retrieved_chunks_text] |
|
|
|
i = 1 |
|
for chunk in retrieved_chunks_text: |
|
print(f"Reranked Chunk number {i}:\n{chunk}") |
|
i = i + 1 |
|
|
|
context = "\nExtracted documents:\n" |
|
context += "".join([doc for i, doc in enumerate(retrieved_chunks_text)]) |
|
|
|
final_prompt = RAG_PROMPT_TEMPLATE.format( |
|
question=query, context=context, history=history[:-1] |
|
) |
|
|
|
print(f"Query:\n{final_prompt}") |
|
pattern = r"Filename:(.*?);" |
|
match = re.findall(pattern, final_prompt) |
|
self.images=match |
|
|
|
|
|
generation_thread = threading.Thread(target=self.llmpipeline, args=(final_prompt,)) |
|
generation_thread.start() |
|
|
|
return self.streamer |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def returnImages(self): |
|
imageList = [] |
|
for image in self.images: |
|
imageList.append(f"{self.docs}\\{image}") |
|
return imageList |
|
|
|
def launchGr(self): |
|
gr.Interface.from_pipeline(self.llmpipeline).launch() |
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
renewEmbeddings = False |
|
reranking = True |
|
bot = BSIChatbot() |
|
bot.initializeEmbeddingModel(renewEmbeddings) |
|
if reranking == True: |
|
bot.initializeRerankingModel() |
|
|
|
|
|
|
|
|
|
bot.initializeLLM() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
with gr.Row() as row: |
|
with gr.Column(scale=3): |
|
chatbot = gr.Chatbot(type="messages") |
|
msg = gr.Textbox() |
|
clear = gr.Button("Clear") |
|
reset = gr.Button("Reset") |
|
with gr.Column(scale=1): |
|
gallery = gr.Gallery(label="Bildergalerie",elem_id="gallery") |
|
|
|
def user(user_message, history: list): |
|
return "", history + [{"role": "user", "content": user_message}] |
|
|
|
|
|
def returnImages(): |
|
|
|
image_paths = bot.returnImages() |
|
print(f"returning images: {image_paths}") |
|
return image_paths |
|
|
|
def gradiobot(history: list): |
|
start = time.time() |
|
print(f"ragQuery hist -1:{history[-1].get('content')}") |
|
print(f"ragQuery hist 0:{history[0].get('content')}") |
|
print(f"fullHistory: {history}" ) |
|
bot_response = bot.ragPrompt(history[-1].get('content'), reranking, history) |
|
history.append({"role": "assistant", "content": ""}) |
|
|
|
image_gallery = returnImages() |
|
|
|
for token in bot_response: |
|
if "eot_id" in token: |
|
token = token.replace("<|eot_id|>","") |
|
if token.startswith("-"): |
|
token = f"\n{token}" |
|
if re.match(r"^[1-9]\.",token): |
|
token = f"\n{token}" |
|
|
|
history[-1]['content'] += token |
|
yield history, image_gallery |
|
end = time.time() |
|
print("End2End Query took", end - start, "seconds!") |
|
|
|
def resetHistory(): |
|
return [] |
|
|
|
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( |
|
gradiobot, inputs=[chatbot], outputs=[chatbot, gallery] |
|
) |
|
|
|
|
|
clear.click(lambda: None, None, chatbot, queue=False) |
|
reset.click(resetHistory, outputs=chatbot, queue=False) |
|
demo.css = """ |
|
#gallery { |
|
display: grid; |
|
grid-template-columns: repeat(2, 1fr); |
|
gap: 10px; |
|
height: 400px; |
|
overflow: auto; |
|
} |
|
""" |
|
demo.launch(allowed_paths=["/home/user/app/docs"]) |
|
|
|
|
|
|
|
|
|
|