Retry
Browse files
app.py
ADDED
@@ -0,0 +1,484 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = "" # Disable CUDA initialization
|
3 |
+
os.environ["allow_dangerous_deserialization"] = "True"
|
4 |
+
print(os.getcwd())
|
5 |
+
embedding_path="/home/user/app/docs/_embeddings/index.faiss"
|
6 |
+
print(f"Loading FAISS index from: {embedding_path}")
|
7 |
+
if not os.path.exists(embedding_path):
|
8 |
+
print("File not found!")
|
9 |
+
HF_KEY=os.getenv('Gated_Repo')
|
10 |
+
|
11 |
+
import spaces
|
12 |
+
import time
|
13 |
+
from typing import final
|
14 |
+
import asyncio
|
15 |
+
|
16 |
+
import torch
|
17 |
+
import gradio as gr
|
18 |
+
import threading
|
19 |
+
import re
|
20 |
+
|
21 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
22 |
+
from langchain_community.docstore import InMemoryDocstore
|
23 |
+
from langchain_community.document_loaders import TextLoader
|
24 |
+
from langchain.docstore.document import Document as LangchainDocument
|
25 |
+
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
|
26 |
+
from langchain_core.indexing import index
|
27 |
+
from langchain_core.vectorstores import VectorStore
|
28 |
+
from llama_index.core.node_parser import TextSplitter
|
29 |
+
from llama_index.legacy.vector_stores import FaissVectorStore
|
30 |
+
from pycparser.ply.yacc import token
|
31 |
+
from ragatouille import RAGPretrainedModel
|
32 |
+
|
33 |
+
from langchain_text_splitters import MarkdownHeaderTextSplitter, CharacterTextSplitter
|
34 |
+
from sentence_transformers import SentenceTransformer
|
35 |
+
from sqlalchemy.testing.suite.test_reflection import metadata
|
36 |
+
from sympy.solvers.diophantine.diophantine import length
|
37 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextIteratorStreamer
|
38 |
+
from transformers import pipeline
|
39 |
+
|
40 |
+
#DEPR:from langchain.vectorstores import FAISS
|
41 |
+
import faiss
|
42 |
+
from langchain_community.vectorstores import FAISS
|
43 |
+
#DEPR: from langchain_community.embeddings import HuggingFaceEmbeddings
|
44 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
45 |
+
from langchain_community.vectorstores.utils import DistanceStrategy
|
46 |
+
from huggingface_hub import login
|
47 |
+
|
48 |
+
# Press Umschalt+F10 to execute it or replace it with your code.
|
49 |
+
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.
|
50 |
+
|
51 |
+
login(token=HF_KEY)
|
52 |
+
|
53 |
+
vectorstore=None
|
54 |
+
rerankingModel=None
|
55 |
+
|
56 |
+
class BSIChatbot:
|
57 |
+
embedding_model = None
|
58 |
+
llmpipeline = None
|
59 |
+
llmtokenizer = None
|
60 |
+
vectorstore = None
|
61 |
+
streamer = None
|
62 |
+
images = [None]
|
63 |
+
|
64 |
+
# model_paths = {
|
65 |
+
# 'llm_path': 'meta-llama/Llama-3.2-3B-Instruct',
|
66 |
+
# 'embed_model_path': 'intfloat/multilingual-e5-large-instruct',
|
67 |
+
# 'rerank_model_path': 'domci/ColBERTv2-mmarco-de-0.1'
|
68 |
+
# }
|
69 |
+
|
70 |
+
llm_path = "meta-llama/Llama-3.2-3B-Instruct"
|
71 |
+
word_and_embed_model_path = "intfloat/multilingual-e5-large-instruct"
|
72 |
+
docs = "/home/user/app/docs"
|
73 |
+
#docs = "H:\\Uni\\Master\\Masterarbeit\\Masterarbeit\\daten\\_parsed_embed_test"
|
74 |
+
rerankModelPath="AdrienB134/ColBERTv1.0-german-mmarcoDE"
|
75 |
+
embedPath="/home/user/app/docs/_embeddings"
|
76 |
+
|
77 |
+
def __init__(self):
|
78 |
+
self.embedding_model = None
|
79 |
+
#self.vectorstore: VectorStore = None
|
80 |
+
|
81 |
+
def initializeEmbeddingModel(self, new_embedding):
|
82 |
+
global vectorstore
|
83 |
+
RAW_KNOWLEDGE_BASE = []
|
84 |
+
|
85 |
+
#Embedding, Vector generation and storing:
|
86 |
+
self.embedding_model = HuggingFaceEmbeddings(
|
87 |
+
model_name=self.word_and_embed_model_path,
|
88 |
+
multi_process=True,
|
89 |
+
model_kwargs={"device": "cuda"},
|
90 |
+
encode_kwargs={"normalize_embeddings": True}, # Set `True` for cosine similarity
|
91 |
+
)
|
92 |
+
|
93 |
+
#index_cpu = faiss.IndexFlatL2(1024)
|
94 |
+
#res = faiss.StandardGpuResources()
|
95 |
+
#index_gpu = faiss.index_cpu_to_gpu(res, 0, index_cpu)
|
96 |
+
dirList = os.listdir(self.docs)
|
97 |
+
if (new_embedding==True):
|
98 |
+
for doc in dirList:
|
99 |
+
print(doc)
|
100 |
+
if (".md" in doc):
|
101 |
+
##doctxt = TextLoader(docs + "\\" + doc).load()
|
102 |
+
file = open(self.docs + "\\" + doc, 'r', encoding='utf-8')
|
103 |
+
doctxt = file.read()
|
104 |
+
RAW_KNOWLEDGE_BASE.append(LangchainDocument(page_content=doctxt, metadata={"source": doc}))
|
105 |
+
file.close()
|
106 |
+
if (".txt" in doc):
|
107 |
+
file = open(self.docs + "\\" + doc, 'r', encoding='cp1252')
|
108 |
+
doctxt = file.read()
|
109 |
+
if doc.replace(".txt",".png") in dirList:
|
110 |
+
RAW_KNOWLEDGE_BASE.append(LangchainDocument(page_content=doctxt, metadata={"source": doc.replace(".txt",".png")}))
|
111 |
+
if doc.replace(".txt",".jpg") in dirList:
|
112 |
+
RAW_KNOWLEDGE_BASE.append(LangchainDocument(page_content=doctxt, metadata={"source": doc.replace(".txt",".jpg")}))
|
113 |
+
file.close()
|
114 |
+
|
115 |
+
# RAW_KNOWLEDGE_BASE.append(txtLoader)
|
116 |
+
# print(RAW_KNOWLEDGE_BASE)
|
117 |
+
|
118 |
+
# Chunking starts here
|
119 |
+
|
120 |
+
headers_to_split_on = [
|
121 |
+
("#", "Header 1"),
|
122 |
+
("##", "Header 2"),
|
123 |
+
("###", "Header 3"),
|
124 |
+
("####", "Header 4"),
|
125 |
+
("#####", "Header 5"),
|
126 |
+
]
|
127 |
+
|
128 |
+
markdown_splitter = MarkdownHeaderTextSplitter(
|
129 |
+
headers_to_split_on=headers_to_split_on,
|
130 |
+
strip_headers=True
|
131 |
+
)
|
132 |
+
|
133 |
+
tokenizer = AutoTokenizer.from_pretrained(self.word_and_embed_model_path)
|
134 |
+
|
135 |
+
text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(
|
136 |
+
tokenizer=tokenizer,
|
137 |
+
chunk_size=512, # The maximum number of words in a chunk
|
138 |
+
chunk_overlap=0, # The number of characters to overlap between chunks
|
139 |
+
add_start_index=True, # If `True`, includes chunk's start index in metadata
|
140 |
+
strip_whitespace=True, # If `True`, strips whitespace from the start and end of every document
|
141 |
+
)
|
142 |
+
|
143 |
+
##Was macht man mit start Index herausfinden und wie metadata adden
|
144 |
+
docs_processed = []
|
145 |
+
for doc in RAW_KNOWLEDGE_BASE:
|
146 |
+
print(f"Word-Length in doc:{len(doc.page_content.split())}")
|
147 |
+
doc_cache = markdown_splitter.split_text(doc.page_content)
|
148 |
+
# print(f"Word-Length in doc_cache after MarkdownSplitter:{len(doc_cache.split())}")
|
149 |
+
doc_cache = text_splitter.split_documents(doc_cache)
|
150 |
+
# print(f"Word-Length in doc_cache after text_splitter:{len(doc_cache.split())}")
|
151 |
+
for chunk in doc_cache:
|
152 |
+
chunk.metadata.update({"source": doc.metadata['source']})
|
153 |
+
print(f"Chunk_Debug len: {len(chunk.page_content.split())} and Chunk:{chunk}")
|
154 |
+
# DEBUG:
|
155 |
+
# print(f"doc_cache after Metadata added:{doc_cache}\n")
|
156 |
+
docs_processed += doc_cache
|
157 |
+
|
158 |
+
#final_docs = []
|
159 |
+
#for doc in docs_processed:
|
160 |
+
# final_docs += text_splitter.split_documents([doc])
|
161 |
+
|
162 |
+
#docs_processed = final_docs
|
163 |
+
|
164 |
+
##Ab hier alt:
|
165 |
+
# MARKDOWN_SEPARATORS = [
|
166 |
+
# "\n\n",
|
167 |
+
# "---"
|
168 |
+
# "\n",
|
169 |
+
# " ",
|
170 |
+
# ""
|
171 |
+
# ]
|
172 |
+
|
173 |
+
#text_splitter = RecursiveCharacterTextSplitter(
|
174 |
+
# chunk_size=512, # The maximum number of characters in a chunk
|
175 |
+
# chunk_overlap=100, # The number of characters to overlap between chunks
|
176 |
+
# add_start_index=True, # If `True`, includes chunk's start index in metadata
|
177 |
+
# strip_whitespace=True, # If `True`, strips whitespace from the start and end of every document
|
178 |
+
# separators=MARKDOWN_SEPARATORS,
|
179 |
+
#)
|
180 |
+
|
181 |
+
#docs_processed = []
|
182 |
+
#for doc in RAW_KNOWLEDGE_BASE:
|
183 |
+
# docs_processed += text_splitter.split_documents([doc])
|
184 |
+
|
185 |
+
print(f"Docs processed:{len(docs_processed)}")
|
186 |
+
# Max_Sequence_Length of e5 large instr = 512 Tokens
|
187 |
+
|
188 |
+
|
189 |
+
# Make sure the maximum length is below embedding size
|
190 |
+
lengths = [len(s.page_content) for s in docs_processed]
|
191 |
+
print(max(lengths))
|
192 |
+
|
193 |
+
#for l in docs_processed:
|
194 |
+
# print(f"Char-Length:{len(l.page_content.split())}")
|
195 |
+
# print(f"Tokenizer Length: {len(tokenizer.tokenize(l.page_content))}")
|
196 |
+
|
197 |
+
#if (max(lengths) > SentenceTransformer(self.word_and_embed_model_path).max_seq_length):
|
198 |
+
# print(
|
199 |
+
# f'Error: Fit chunking size into embedding model.. Chunk{max(lengths)} is bigger than {SentenceTransformer(self.word_and_embed_model_path).Max_Sequence_Length}')
|
200 |
+
|
201 |
+
start = time.time()
|
202 |
+
#docstore = InMemoryDocstore({str(i): doc for i, doc in enumerate(docs_processed)})
|
203 |
+
#index_to_docstore_id = {i: str(i) for i in range(len(docs_processed))}
|
204 |
+
vectorstore = FAISS.from_documents(docs_processed, self.embedding_model, distance_strategy=DistanceStrategy.COSINE)
|
205 |
+
#self.vectorstore = FAISS(
|
206 |
+
# embedding_function=self.embedding_model,
|
207 |
+
# index=index_gpu,
|
208 |
+
# distance_strategy=DistanceStrategy.COSINE,
|
209 |
+
# docstore=docstore,
|
210 |
+
# index_to_docstore_id=index_to_docstore_id
|
211 |
+
#)
|
212 |
+
#self.vectorstore.from_documents(docs_processed, self.embedding_model)
|
213 |
+
#index_cpu = faiss.index_gpu_to_cpu(self.vectorstore.index)
|
214 |
+
#self.vectorstore.index = index_cpu
|
215 |
+
vectorstore.save_local(self.embedPath)
|
216 |
+
#self.vectorstore.index = index_gpu
|
217 |
+
end = time.time()
|
218 |
+
print("Saving Embeddings took", end-start, "seconds!")
|
219 |
+
else:
|
220 |
+
start = time.time()
|
221 |
+
vectorstore = FAISS.load_local(self.embedPath, self.embedding_model, allow_dangerous_deserialization=True)
|
222 |
+
#self.vectorstore.index = index_gpu
|
223 |
+
end = time.time()
|
224 |
+
|
225 |
+
print("Loading Embeddings took", end - start, "seconds!")
|
226 |
+
|
227 |
+
def retrieveSimiliarEmbedding(self, query):
|
228 |
+
global vectorstore
|
229 |
+
print("Retrieving Embeddings...")
|
230 |
+
start = time.time()
|
231 |
+
query = f"Instruct: Given a search query, retrieve the relevant passages that answer the query\nQuery:{query}"
|
232 |
+
|
233 |
+
#self.vectorstore.
|
234 |
+
#retrieved_chunks = self.vectorstore.similarity_search(query=query, k=20)
|
235 |
+
retrieved_chunks = vectorstore.similarity_search(query=query, k=20)
|
236 |
+
#finalchunks = []
|
237 |
+
#for chunk in retrieved_chunks:
|
238 |
+
# if "---" not in chunk.page_content:
|
239 |
+
# finalchunks.append(chunk)
|
240 |
+
#retrieved_chunks = finalchunks
|
241 |
+
end = time.time()
|
242 |
+
print("Retrieving Chunks with similiar embeddings took", end - start, "seconds!")
|
243 |
+
#print("\n==================================Top document==================================")
|
244 |
+
#print(retrieved_chunks[0].page_content)
|
245 |
+
#print(retrieved_chunks[1].page_content)
|
246 |
+
#print(retrieved_chunks[2].page_content)
|
247 |
+
#print("==================================Metadata==================================")
|
248 |
+
#print(retrieved_chunks[0].metadata)
|
249 |
+
#print(retrieved_chunks[1].metadata)
|
250 |
+
#print(retrieved_chunks[2].metadata)
|
251 |
+
print(f"printing first chunk to see whats inside: {retrieved_chunks[0]}")
|
252 |
+
return retrieved_chunks
|
253 |
+
|
254 |
+
def initializeLLM(self):
|
255 |
+
bnb_config = BitsAndBytesConfig(
|
256 |
+
load_in_8bit=True,
|
257 |
+
#bnb_8bit_use_double_quant=True,
|
258 |
+
#bnb_8bit_quant_type="nf4",
|
259 |
+
#bnb_8bit_compute_dtype=torch.bfloat16,
|
260 |
+
)
|
261 |
+
llm = AutoModelForCausalLM.from_pretrained(
|
262 |
+
self.llm_path, quantization_config=bnb_config
|
263 |
+
)
|
264 |
+
self.llmtokenizer = AutoTokenizer.from_pretrained(self.llm_path)
|
265 |
+
self.streamer=TextIteratorStreamer(self.llmtokenizer, skip_prompt=True)
|
266 |
+
self.llmpipeline = pipeline(
|
267 |
+
model=llm,
|
268 |
+
tokenizer=self.llmtokenizer,
|
269 |
+
task="text-generation",
|
270 |
+
do_sample=True,
|
271 |
+
temperature=0.7,
|
272 |
+
repetition_penalty=1.1,
|
273 |
+
return_full_text=False,
|
274 |
+
streamer=self.streamer,
|
275 |
+
max_new_tokens=500,
|
276 |
+
)
|
277 |
+
|
278 |
+
def queryLLM(self,query):
|
279 |
+
#resp = self.llmpipeline(chat) Fixen
|
280 |
+
return(self.llmpipeline(query)[0]["generated_text"])
|
281 |
+
|
282 |
+
def initializeRerankingModel(self):
|
283 |
+
global rerankingModel
|
284 |
+
rerankingModel = RAGPretrainedModel.from_pretrained(self.rerankModelPath)
|
285 |
+
|
286 |
+
@spaces.GPU
|
287 |
+
def ragPrompt(self, query, rerankingStep, history):
|
288 |
+
prompt_in_chat_format = [
|
289 |
+
{
|
290 |
+
"role": "system",
|
291 |
+
"content": """You are an helpful Chatbot for the BSI IT-Grundschutz. Using the information contained in the context,
|
292 |
+
give a comprehensive answer to the question.
|
293 |
+
Respond only to the question asked, response should be concise and relevant but also give some context to the question.
|
294 |
+
Provide the source document when relevant for the understanding.
|
295 |
+
If the answer cannot be deduced from the context, do not give an answer.""",
|
296 |
+
},
|
297 |
+
{
|
298 |
+
"role": "user",
|
299 |
+
"content": """Context:
|
300 |
+
{context}
|
301 |
+
---
|
302 |
+
Chat-History:
|
303 |
+
{history}
|
304 |
+
---
|
305 |
+
Now here is the question you need to answer.
|
306 |
+
|
307 |
+
Question: {question}""",
|
308 |
+
},
|
309 |
+
]
|
310 |
+
RAG_PROMPT_TEMPLATE = self.llmtokenizer.apply_chat_template(
|
311 |
+
prompt_in_chat_format, tokenize=False, add_generation_prompt=True
|
312 |
+
)
|
313 |
+
retrieved_chunks = self.retrieveSimiliarEmbedding(query)
|
314 |
+
retrieved_chunks_text = []
|
315 |
+
#TODO Irgendwas stimmt hier mit den Listen nicht
|
316 |
+
for chunk in retrieved_chunks:
|
317 |
+
#TODO Hier noch was smarteres Überlegen für alle Header
|
318 |
+
if "Header 1" in chunk.metadata.keys():
|
319 |
+
retrieved_chunks_text.append(f"The Document is: '{chunk.metadata['source']}'\nHeader of the Section is: '{chunk.metadata['Header 1']}' and Content of it:{chunk.page_content}")
|
320 |
+
else:
|
321 |
+
retrieved_chunks_text.append(
|
322 |
+
f"The Document is: '{chunk.metadata['source']}'\nImage Description is: ':{chunk.page_content}")
|
323 |
+
i=1
|
324 |
+
for chunk in retrieved_chunks_text:
|
325 |
+
print(f"Retrieved Chunk number {i}:\n{chunk}")
|
326 |
+
i=i+1
|
327 |
+
|
328 |
+
if rerankingStep==True:
|
329 |
+
if rerankingModel == None:
|
330 |
+
print ("initializing Reranker-Model..")
|
331 |
+
self.initializeRerankingModel()
|
332 |
+
print("Starting Reranking Chunks...")
|
333 |
+
rerankingModel
|
334 |
+
retrieved_chunks_text=self.rerankingModel.rerank(query, retrieved_chunks_text,k=5)
|
335 |
+
retrieved_chunks_text=[chunk["content"] for chunk in retrieved_chunks_text]
|
336 |
+
|
337 |
+
i = 1
|
338 |
+
for chunk in retrieved_chunks_text:
|
339 |
+
print(f"Reranked Chunk number {i}:\n{chunk}")
|
340 |
+
i = i + 1
|
341 |
+
|
342 |
+
context = "\nExtracted documents:\n"
|
343 |
+
context += "".join([doc for i, doc in enumerate(retrieved_chunks_text)])
|
344 |
+
#Alles außer letzte Useranfrage
|
345 |
+
final_prompt = RAG_PROMPT_TEMPLATE.format(
|
346 |
+
question=query, context=context, history=history[:-1]
|
347 |
+
)
|
348 |
+
|
349 |
+
print(f"Query:\n{final_prompt}")
|
350 |
+
pattern = r"Filename:(.*?);"
|
351 |
+
match = re.findall(pattern, final_prompt)
|
352 |
+
self.images=match
|
353 |
+
|
354 |
+
#queryModel = HuggingFacePipeline(pipeline = self.llmpipeline)
|
355 |
+
generation_thread = threading.Thread(target=self.llmpipeline, args=(final_prompt,))
|
356 |
+
generation_thread.start()
|
357 |
+
|
358 |
+
return self.streamer
|
359 |
+
|
360 |
+
#answer=self.queryLLM(final_prompt)
|
361 |
+
#answer = self.llmpipeline(final_prompt)
|
362 |
+
#for token in answer:
|
363 |
+
# print (token["generated_text"])
|
364 |
+
# yield token["generated_text"]
|
365 |
+
# gen = queryModel.stream(final_prompt)
|
366 |
+
|
367 |
+
|
368 |
+
#return gen
|
369 |
+
|
370 |
+
#print (f"Answer:\n{answer}")
|
371 |
+
|
372 |
+
def returnImages(self):
|
373 |
+
imageList = []
|
374 |
+
for image in self.images:
|
375 |
+
imageList.append(f"{self.docs}\\{image}")
|
376 |
+
return imageList
|
377 |
+
|
378 |
+
def launchGr(self):
|
379 |
+
gr.Interface.from_pipeline(self.llmpipeline).launch()
|
380 |
+
|
381 |
+
|
382 |
+
|
383 |
+
if __name__ == '__main__':
|
384 |
+
#RAW_KNOWLEDGE_BASE = []
|
385 |
+
#RAW_KNOWLEDGE_BASE.append(LangchainDocument(page_content="1Text", metadata={"source": "bb"}))
|
386 |
+
#RAW_KNOWLEDGE_BASE.append(LangchainDocument(page_content="2Text", metadata={"source": "aa"}))
|
387 |
+
#RAW_KNOWLEDGE_BASE[0].metadata.update({"NeuerKey":"White"})
|
388 |
+
#print(RAW_KNOWLEDGE_BASE)
|
389 |
+
#time.sleep(10)
|
390 |
+
|
391 |
+
#{doc.page_content} [{doc.metadata}] => aktuellen Header in jeden Chunk embedden; Doc.Metadata retrieven
|
392 |
+
|
393 |
+
renewEmbeddings = False
|
394 |
+
reranking = True
|
395 |
+
bot = BSIChatbot()
|
396 |
+
bot.initializeEmbeddingModel(renewEmbeddings)
|
397 |
+
if reranking == True:
|
398 |
+
bot.initializeRerankingModel()
|
399 |
+
#TODO: DEBUG:
|
400 |
+
#bot.retrieveSimiliarEmbedding("Was ist der IT-Grundschutz?")
|
401 |
+
#TODO: DEBUG:
|
402 |
+
#time.sleep(10)
|
403 |
+
bot.initializeLLM()
|
404 |
+
#bot.retrieveSimiliarEmbedding("Welche Typen von Anforderungen gibt es im IT-Grundschutz?")
|
405 |
+
|
406 |
+
#bot.queryLLM("Welche Typen von Anforderungen gibt es im IT-Grundschutz?")
|
407 |
+
|
408 |
+
#bot.ragPrompt("""
|
409 |
+
#Welche Informationen beinhaltet die IT-Grundschutz-Methodik (BSI-Standard 200-2)? Wähle aus den folgenden Antwortmöglichkeiten (mehrere können richtig sein!):
|
410 |
+
#A: besonders schutzwürdigen Komponenten,
|
411 |
+
#B: methodische Hilfestellungen zur schrittweisen Einführung eines ISMS,
|
412 |
+
#C: wie die Informationssicherheit im laufenden Betrieb aufrechterhalten und kontinuierlich verbessert werden kann,
|
413 |
+
#D: effiziente Verfahren, um die allgemeinen Anforderungen des BSI-Standards 200-1 zu konkretisieren
|
414 |
+
#""", True)
|
415 |
+
|
416 |
+
#bot.launchGr()
|
417 |
+
|
418 |
+
with gr.Blocks() as demo:
|
419 |
+
with gr.Row() as row:
|
420 |
+
with gr.Column(scale=3):
|
421 |
+
chatbot = gr.Chatbot(type="messages")
|
422 |
+
msg = gr.Textbox()
|
423 |
+
clear = gr.Button("Clear")
|
424 |
+
reset = gr.Button("Reset")
|
425 |
+
with gr.Column(scale=1): # Bildergalerie
|
426 |
+
gallery = gr.Gallery(label="Bildergalerie",elem_id="gallery")
|
427 |
+
|
428 |
+
def user(user_message, history: list):
|
429 |
+
return "", history + [{"role": "user", "content": user_message}]
|
430 |
+
|
431 |
+
|
432 |
+
def returnImages():
|
433 |
+
# Hier holen wir uns die Bildpfade und wandeln sie in gr.Image-Objekte um
|
434 |
+
image_paths = bot.returnImages()
|
435 |
+
print(f"returning images: {image_paths}")
|
436 |
+
return image_paths
|
437 |
+
|
438 |
+
def gradiobot(history: list):
|
439 |
+
start = time.time()
|
440 |
+
print(f"ragQuery hist -1:{history[-1].get('content')}")
|
441 |
+
print(f"ragQuery hist 0:{history[0].get('content')}")
|
442 |
+
print(f"fullHistory: {history}" )
|
443 |
+
bot_response = bot.ragPrompt(history[-1].get('content'), reranking, history)
|
444 |
+
history.append({"role": "assistant", "content": ""})
|
445 |
+
|
446 |
+
image_gallery = returnImages()
|
447 |
+
|
448 |
+
for token in bot_response:
|
449 |
+
if "eot_id" in token:
|
450 |
+
token = token.replace("<|eot_id|>","")
|
451 |
+
if token.startswith("-"):
|
452 |
+
token = f"\n{token}"
|
453 |
+
if re.match(r"^[1-9]\.",token):
|
454 |
+
token = f"\n{token}"
|
455 |
+
|
456 |
+
history[-1]['content'] += token
|
457 |
+
yield history, image_gallery
|
458 |
+
end = time.time()
|
459 |
+
print("End2End Query took", end - start, "seconds!")
|
460 |
+
|
461 |
+
def resetHistory():
|
462 |
+
return []
|
463 |
+
|
464 |
+
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
465 |
+
gradiobot, inputs=[chatbot], outputs=[chatbot, gallery]
|
466 |
+
)
|
467 |
+
|
468 |
+
|
469 |
+
clear.click(lambda: None, None, chatbot, queue=False)
|
470 |
+
reset.click(resetHistory, outputs=chatbot, queue=False)
|
471 |
+
demo.css = """
|
472 |
+
#gallery {
|
473 |
+
display: grid;
|
474 |
+
grid-template-columns: repeat(2, 1fr);
|
475 |
+
gap: 10px;
|
476 |
+
height: 400px;
|
477 |
+
overflow: auto;
|
478 |
+
}
|
479 |
+
"""
|
480 |
+
demo.launch(allowed_paths=["/home/user/app/docs"])
|
481 |
+
|
482 |
+
#Answer: B, C und D => Korrekt!
|
483 |
+
|
484 |
+
# See PyCharm help at https://www.jetbrains.com/help/pycharm/
|