MikeMann commited on
Commit
71626e2
·
1 Parent(s): 37fd02c
Files changed (1) hide show
  1. app.py +421 -156
app.py CHANGED
@@ -1,131 +1,267 @@
1
  import os
2
- os.environ["CUDA_VISIBLE_DEVICES"] = "" # Disable CUDA initialization
3
  os.environ["allow_dangerous_deserialization"] = "True"
4
  print(os.getcwd())
5
  embedding_path="/home/user/app/docs/_embeddings/index.faiss"
6
  print(f"Loading FAISS index from: {embedding_path}")
7
  if not os.path.exists(embedding_path):
8
  print("File not found!")
9
-
10
  HF_KEY=os.getenv('Gated_Repo')
11
 
12
  import spaces
13
- import sys
14
  import time
15
- import re
16
- import threading
17
- from typing import List, Dict
18
 
19
  import torch
20
  import gradio as gr
 
 
21
 
22
- import pickle
23
  from langchain_community.docstore import InMemoryDocstore
24
  from langchain_community.document_loaders import TextLoader
 
25
  from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  from langchain_community.vectorstores import FAISS
 
27
  from langchain_huggingface import HuggingFaceEmbeddings
28
- from langchain.docstore.document import Document as LangchainDocument
29
- from langchain.text_splitter import RecursiveCharacterTextSplitter
30
  from langchain_community.vectorstores.utils import DistanceStrategy
31
- from sentence_transformers import SentenceTransformer
32
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextIteratorStreamer, pipeline, TextStreamer
33
  from huggingface_hub import login
34
 
 
 
 
35
  login(token=HF_KEY)
36
 
37
  vectorstore=None
38
 
39
- class CustomTextStreamer(TextStreamer):
40
- def __init__(self, tokenizer):
41
- super().__init__(tokenizer)
42
-
43
- def put(self, token_ids):
44
- text = self.tokenizer.decode(token_ids, skip_special_tokens=True)
45
- print(text, end='', flush=True) # Direktes Streaming auf der Konsole
46
- yield text # Yield für Gradio oder ähnliche Frameworks
47
-
48
  class BSIChatbot:
49
- def __init__(self, model_paths: Dict[str, str], docs_path: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  self.embedding_model = None
51
- self.llmpipeline = None
52
- self.llmtokenizer = None
53
- #self.vectorstore = None
54
- self.reranking_model = None
55
- self.streamer = None
56
- self.images = [None]
57
-
58
- self.llm_path = model_paths['llm_path']
59
- self.word_and_embed_model_path = model_paths['embed_model_path']
60
- self.docs = docs_path
61
- self.rerank_model_path = model_paths['rerank_model_path']
62
-
63
- #@spaces.GPU
64
- def initialize_embedding_model(self, rebuild_embeddings: bool):
65
- raw_knowledge_base = []
66
  global vectorstore
67
- # Initialize embedding model
 
 
68
  self.embedding_model = HuggingFaceEmbeddings(
69
  model_name=self.word_and_embed_model_path,
70
  multi_process=True,
71
  model_kwargs={"device": "cuda"},
72
- encode_kwargs={"normalize_embeddings": True},
73
  )
74
 
75
- if rebuild_embeddings:
76
- # Load documents
77
- for doc in os.listdir(self.docs):
78
- file_path = os.path.join(self.docs, doc)
79
- if doc.endswith(".md") or doc.endswith(".txt"):
80
- with open(file_path, 'r', encoding='utf-8' if doc.endswith(".md") else 'cp1252') as file:
81
- content = file.read()
82
- metadata = {"source": doc}
83
- raw_knowledge_base.append(LangchainDocument(page_content=content, metadata=metadata))
84
-
85
- # Split documents into chunks
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  tokenizer = AutoTokenizer.from_pretrained(self.word_and_embed_model_path)
 
87
  text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(
88
  tokenizer=tokenizer,
89
- chunk_size=512,
90
- chunk_overlap=0,
91
- add_start_index=True,
92
- strip_whitespace=True,
93
  )
94
 
95
- processed_docs = []
96
- for doc in raw_knowledge_base:
97
- chunks = text_splitter.split_documents([doc])
98
- for chunk in chunks:
 
 
 
 
 
99
  chunk.metadata.update({"source": doc.metadata['source']})
100
- processed_docs.extend(chunks)
101
-
102
- # Create and save vector store
103
- vectorstore = FAISS.from_documents(processed_docs, self.embedding_model, distance_strategy=DistanceStrategy.COSINE)
104
- vectorstore.save_local(os.path.join(self.docs, "_embeddings"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  else:
106
- # Load existing vector store
107
- vectorstore = FAISS.load_local(os.path.join(self.docs, "_embeddings"), self.embedding_model, allow_dangerous_deserialization=True)
108
- print("DBG: Vectorstore Status Initialization:", vectorstore)
 
109
 
110
- #@spaces.GPU
111
- def retrieve_similar_embedding(self, query: str):
 
112
  global vectorstore
113
- #lazy load
114
- #if (self.vectorstore == None):
115
- # self.vectorstore = FAISS.load_local(os.path.join(self.docs, "_embeddings"), self.embedding_model,
116
- # allow_dangerous_deserialization=True)
117
- print("DBG: Vectorstore Status retriever:", vectorstore)
118
  query = f"Instruct: Given a search query, retrieve the relevant passages that answer the query\nQuery:{query}"
119
- return vectorstore.similarity_search(query=query, k=20)
120
 
121
- #@spaces.GPU
122
- def initialize_llm(self):
123
- bnb_config = BitsAndBytesConfig(load_in_8bit=True)
124
- llm = AutoModelForCausalLM.from_pretrained(self.llm_path, quantization_config=bnb_config)
125
- #llm = AutoModelForCausalLM.from_pretrained(self.llm_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  self.llmtokenizer = AutoTokenizer.from_pretrained(self.llm_path)
127
- #self.streamer = TextIteratorStreamer(self.llmtokenizer, skip_prompt=True)
128
- self.streamer = CustomTextStreamer(self.llmtokenizer)
129
  self.llmpipeline = pipeline(
130
  model=llm,
131
  tokenizer=self.llmtokenizer,
@@ -134,84 +270,213 @@ class BSIChatbot:
134
  temperature=0.7,
135
  repetition_penalty=1.1,
136
  return_full_text=False,
137
- #streamer=self.streamer,
138
  max_new_tokens=500,
139
  )
140
 
141
- @spaces.GPU
142
- def rag_prompt(self, query: str, rerank: bool, history: List[Dict]):
143
- retrieved_chunks = self.retrieve_similar_embedding(query)
144
- retrieved_texts = [f"{chunk.metadata['source']}:\n{chunk.page_content}" for chunk in retrieved_chunks]
145
-
146
- context = "\n".join(retrieved_texts)
147
- history_text = "\n".join([h['content'] for h in history])
148
- final_prompt = f"""Context:
149
- {context}
150
- ---
151
- History:
152
- {history_text}
153
- ---
154
- Question: {query}"""
155
-
156
- input_ids = self.llmtokenizer(final_prompt, return_tensors="pt").to(self.llmpipeline.model.device)
157
- streamer=self.streamer
158
-
159
- self.llmpipeline.model.generate(input_ids=input_ids, streamer=streamer, max_new_tokens=500, temperature=0.7, repetition_penalty=1.1)
160
-
161
- for text in streamer:
162
- yield text
163
-
164
- #input_ids = self.llmtokenizer(final_prompt, return_tensors="pt").to(self.llmpipeline.model.device)
165
- #streamer = TextIteratorStreamer(self.llmtokenizer, skip_prompt=True)
166
- #generate_kwargs = {
167
- # "input_ids": input_ids,
168
- # "streamer": streamer,
169
- # "max_new_tokens": 500,
170
- # "temperature": 0.7,
171
- # "repetition_penalty": 1.1,
172
- # "do_sample": True,
173
- #}
174
-
175
- #generation_thread = threading.Thread(target=self.llmpipeline.model.generate, kwargs=generate_kwargs)
176
- #generation_thread.start()
177
-
178
- #for text in streamer:
179
- #yield text
180
-
181
- def launch_interface(self):
182
- with gr.Blocks() as demo:
183
- chatbot = gr.Chatbot(type="messages")
184
- msg = gr.Textbox()
185
- clear = gr.Button("Clear")
186
- reset = gr.Button("Reset")
187
-
188
- def user_input(user_message, history):
189
- return "", history + [{"role": "user", "content": user_message}]
190
-
191
- def bot_response(history):
192
- response = self.rag_prompt(history[-1]['content'], True, history)
193
- history.append({"role": "assistant", "content": ""})
194
-
195
- for token in response:
196
- history[-1]['content'] += token
197
- #print("Current Response Token Serializable?", pickle.dumps(token))
198
- yield history
199
-
200
- msg.submit(user_input, [msg, chatbot], [msg, chatbot]).then(bot_response, chatbot, chatbot)
201
- clear.click(lambda: None, None, chatbot)
202
- reset.click(lambda: [], outputs=chatbot)
203
-
204
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
  if __name__ == '__main__':
207
- model_paths = {
208
- 'llm_path': 'meta-llama/Llama-3.2-3B-Instruct',
209
- 'embed_model_path': 'intfloat/multilingual-e5-large-instruct',
210
- 'rerank_model_path': 'domci/ColBERTv2-mmarco-de-0.1'
211
- }
212
- docs_path = '/home/user/app/docs'
213
-
214
- bot = BSIChatbot(model_paths, docs_path)
215
- bot.initialize_embedding_model(rebuild_embeddings=False)
216
- bot.initialize_llm()
217
- bot.launch_interface()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
 
2
  os.environ["allow_dangerous_deserialization"] = "True"
3
  print(os.getcwd())
4
  embedding_path="/home/user/app/docs/_embeddings/index.faiss"
5
  print(f"Loading FAISS index from: {embedding_path}")
6
  if not os.path.exists(embedding_path):
7
  print("File not found!")
 
8
  HF_KEY=os.getenv('Gated_Repo')
9
 
10
  import spaces
 
11
  import time
12
+ from typing import final
13
+ import asyncio
 
14
 
15
  import torch
16
  import gradio as gr
17
+ import threading
18
+ import re
19
 
20
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
21
  from langchain_community.docstore import InMemoryDocstore
22
  from langchain_community.document_loaders import TextLoader
23
+ from langchain.docstore.document import Document as LangchainDocument
24
  from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
25
+ from langchain_core.indexing import index
26
+ from langchain_core.vectorstores import VectorStore
27
+ from llama_index.core.node_parser import TextSplitter
28
+ from llama_index.legacy.vector_stores import FaissVectorStore
29
+ from pycparser.ply.yacc import token
30
+ from ragatouille import RAGPretrainedModel
31
+
32
+ from langchain_text_splitters import MarkdownHeaderTextSplitter, CharacterTextSplitter
33
+ from sentence_transformers import SentenceTransformer
34
+ from sqlalchemy.testing.suite.test_reflection import metadata
35
+ from sympy.solvers.diophantine.diophantine import length
36
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextIteratorStreamer
37
+ from transformers import pipeline
38
+
39
+ #DEPR:from langchain.vectorstores import FAISS
40
+ import faiss
41
  from langchain_community.vectorstores import FAISS
42
+ #DEPR: from langchain_community.embeddings import HuggingFaceEmbeddings
43
  from langchain_huggingface import HuggingFaceEmbeddings
 
 
44
  from langchain_community.vectorstores.utils import DistanceStrategy
 
 
45
  from huggingface_hub import login
46
 
47
+ # Press Umschalt+F10 to execute it or replace it with your code.
48
+ # Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.
49
+
50
  login(token=HF_KEY)
51
 
52
  vectorstore=None
53
 
 
 
 
 
 
 
 
 
 
54
  class BSIChatbot:
55
+ embedding_model = None
56
+ llmpipeline = None
57
+ llmtokenizer = None
58
+ vectorstore = None
59
+ rerankingModel = None
60
+ streamer = None
61
+ images = [None]
62
+
63
+ # model_paths = {
64
+ # 'llm_path': 'meta-llama/Llama-3.2-3B-Instruct',
65
+ # 'embed_model_path': 'intfloat/multilingual-e5-large-instruct',
66
+ # 'rerank_model_path': 'domci/ColBERTv2-mmarco-de-0.1'
67
+ # }
68
+
69
+ llm_path = "meta-llama/Llama-3.2-3B-Instruct"
70
+ word_and_embed_model_path = "intfloat/multilingual-e5-large-instruct"
71
+ docs = "/home/user/app/docs"
72
+ #docs = "H:\\Uni\\Master\\Masterarbeit\\Masterarbeit\\daten\\_parsed_embed_test"
73
+ rerankModelPath="domci/ColBERTv2-mmarco-de-0.1"
74
+ embedPath="/home/user/app/docs_embeddings"
75
+
76
+ def __init__(self):
77
  self.embedding_model = None
78
+ #self.vectorstore: VectorStore = None
79
+
80
+ def initializeEmbeddingModel(self, new_embedding):
 
 
 
 
 
 
 
 
 
 
 
 
81
  global vectorstore
82
+ RAW_KNOWLEDGE_BASE = []
83
+
84
+ #Embedding, Vector generation and storing:
85
  self.embedding_model = HuggingFaceEmbeddings(
86
  model_name=self.word_and_embed_model_path,
87
  multi_process=True,
88
  model_kwargs={"device": "cuda"},
89
+ encode_kwargs={"normalize_embeddings": True}, # Set `True` for cosine similarity
90
  )
91
 
92
+ #index_cpu = faiss.IndexFlatL2(1024)
93
+ #res = faiss.StandardGpuResources()
94
+ #index_gpu = faiss.index_cpu_to_gpu(res, 0, index_cpu)
95
+ dirList = os.listdir(self.docs)
96
+ if (new_embedding==True):
97
+ for doc in dirList:
98
+ print(doc)
99
+ if (".md" in doc):
100
+ ##doctxt = TextLoader(docs + "\\" + doc).load()
101
+ file = open(self.docs + "\\" + doc, 'r', encoding='utf-8')
102
+ doctxt = file.read()
103
+ RAW_KNOWLEDGE_BASE.append(LangchainDocument(page_content=doctxt, metadata={"source": doc}))
104
+ file.close()
105
+ if (".txt" in doc):
106
+ file = open(self.docs + "\\" + doc, 'r', encoding='cp1252')
107
+ doctxt = file.read()
108
+ if doc.replace(".txt",".png") in dirList:
109
+ RAW_KNOWLEDGE_BASE.append(LangchainDocument(page_content=doctxt, metadata={"source": doc.replace(".txt",".png")}))
110
+ if doc.replace(".txt",".jpg") in dirList:
111
+ RAW_KNOWLEDGE_BASE.append(LangchainDocument(page_content=doctxt, metadata={"source": doc.replace(".txt",".jpg")}))
112
+ file.close()
113
+
114
+ # RAW_KNOWLEDGE_BASE.append(txtLoader)
115
+ # print(RAW_KNOWLEDGE_BASE)
116
+
117
+ # Chunking starts here
118
+
119
+ headers_to_split_on = [
120
+ ("#", "Header 1"),
121
+ ("##", "Header 2"),
122
+ ("###", "Header 3"),
123
+ ("####", "Header 4"),
124
+ ("#####", "Header 5"),
125
+ ]
126
+
127
+ markdown_splitter = MarkdownHeaderTextSplitter(
128
+ headers_to_split_on=headers_to_split_on,
129
+ strip_headers=True
130
+ )
131
+
132
  tokenizer = AutoTokenizer.from_pretrained(self.word_and_embed_model_path)
133
+
134
  text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(
135
  tokenizer=tokenizer,
136
+ chunk_size=512, # The maximum number of words in a chunk
137
+ chunk_overlap=0, # The number of characters to overlap between chunks
138
+ add_start_index=True, # If `True`, includes chunk's start index in metadata
139
+ strip_whitespace=True, # If `True`, strips whitespace from the start and end of every document
140
  )
141
 
142
+ ##Was macht man mit start Index herausfinden und wie metadata adden
143
+ docs_processed = []
144
+ for doc in RAW_KNOWLEDGE_BASE:
145
+ print(f"Word-Length in doc:{len(doc.page_content.split())}")
146
+ doc_cache = markdown_splitter.split_text(doc.page_content)
147
+ # print(f"Word-Length in doc_cache after MarkdownSplitter:{len(doc_cache.split())}")
148
+ doc_cache = text_splitter.split_documents(doc_cache)
149
+ # print(f"Word-Length in doc_cache after text_splitter:{len(doc_cache.split())}")
150
+ for chunk in doc_cache:
151
  chunk.metadata.update({"source": doc.metadata['source']})
152
+ print(f"Chunk_Debug len: {len(chunk.page_content.split())} and Chunk:{chunk}")
153
+ # DEBUG:
154
+ # print(f"doc_cache after Metadata added:{doc_cache}\n")
155
+ docs_processed += doc_cache
156
+
157
+ #final_docs = []
158
+ #for doc in docs_processed:
159
+ # final_docs += text_splitter.split_documents([doc])
160
+
161
+ #docs_processed = final_docs
162
+
163
+ ##Ab hier alt:
164
+ # MARKDOWN_SEPARATORS = [
165
+ # "\n\n",
166
+ # "---"
167
+ # "\n",
168
+ # " ",
169
+ # ""
170
+ # ]
171
+
172
+ #text_splitter = RecursiveCharacterTextSplitter(
173
+ # chunk_size=512, # The maximum number of characters in a chunk
174
+ # chunk_overlap=100, # The number of characters to overlap between chunks
175
+ # add_start_index=True, # If `True`, includes chunk's start index in metadata
176
+ # strip_whitespace=True, # If `True`, strips whitespace from the start and end of every document
177
+ # separators=MARKDOWN_SEPARATORS,
178
+ #)
179
+
180
+ #docs_processed = []
181
+ #for doc in RAW_KNOWLEDGE_BASE:
182
+ # docs_processed += text_splitter.split_documents([doc])
183
+
184
+ print(f"Docs processed:{len(docs_processed)}")
185
+ # Max_Sequence_Length of e5 large instr = 512 Tokens
186
+
187
+
188
+ # Make sure the maximum length is below embedding size
189
+ lengths = [len(s.page_content) for s in docs_processed]
190
+ print(max(lengths))
191
+
192
+ #for l in docs_processed:
193
+ # print(f"Char-Length:{len(l.page_content.split())}")
194
+ # print(f"Tokenizer Length: {len(tokenizer.tokenize(l.page_content))}")
195
+
196
+ #if (max(lengths) > SentenceTransformer(self.word_and_embed_model_path).max_seq_length):
197
+ # print(
198
+ # f'Error: Fit chunking size into embedding model.. Chunk{max(lengths)} is bigger than {SentenceTransformer(self.word_and_embed_model_path).Max_Sequence_Length}')
199
+
200
+ start = time.time()
201
+ #docstore = InMemoryDocstore({str(i): doc for i, doc in enumerate(docs_processed)})
202
+ #index_to_docstore_id = {i: str(i) for i in range(len(docs_processed))}
203
+ vectorstore = FAISS.from_documents(docs_processed, self.embedding_model, distance_strategy=DistanceStrategy.COSINE)
204
+ #self.vectorstore = FAISS(
205
+ # embedding_function=self.embedding_model,
206
+ # index=index_gpu,
207
+ # distance_strategy=DistanceStrategy.COSINE,
208
+ # docstore=docstore,
209
+ # index_to_docstore_id=index_to_docstore_id
210
+ #)
211
+ #self.vectorstore.from_documents(docs_processed, self.embedding_model)
212
+ #index_cpu = faiss.index_gpu_to_cpu(self.vectorstore.index)
213
+ #self.vectorstore.index = index_cpu
214
+ vectorstore.save_local(self.embedPath)
215
+ #self.vectorstore.index = index_gpu
216
+ end = time.time()
217
+ print("Saving Embeddings took", end-start, "seconds!")
218
  else:
219
+ start = time.time()
220
+ vectorstore = FAISS.load_local(self.embedPath, self.embedding_model, allow_dangerous_deserialization=True)
221
+ #self.vectorstore.index = index_gpu
222
+ end = time.time()
223
 
224
+ print("Loading Embeddings took", end - start, "seconds!")
225
+
226
+ def retrieveSimiliarEmbedding(self, query):
227
  global vectorstore
228
+ print("Retrieving Embeddings...")
229
+ start = time.time()
 
 
 
230
  query = f"Instruct: Given a search query, retrieve the relevant passages that answer the query\nQuery:{query}"
 
231
 
232
+ #self.vectorstore.
233
+ #retrieved_chunks = self.vectorstore.similarity_search(query=query, k=20)
234
+ retrieved_chunks = vectorstore.similarity_search(query=query, k=20)
235
+ #finalchunks = []
236
+ #for chunk in retrieved_chunks:
237
+ # if "---" not in chunk.page_content:
238
+ # finalchunks.append(chunk)
239
+ #retrieved_chunks = finalchunks
240
+ end = time.time()
241
+ print("Retrieving Chunks with similiar embeddings took", end - start, "seconds!")
242
+ #print("\n==================================Top document==================================")
243
+ #print(retrieved_chunks[0].page_content)
244
+ #print(retrieved_chunks[1].page_content)
245
+ #print(retrieved_chunks[2].page_content)
246
+ #print("==================================Metadata==================================")
247
+ #print(retrieved_chunks[0].metadata)
248
+ #print(retrieved_chunks[1].metadata)
249
+ #print(retrieved_chunks[2].metadata)
250
+ print(f"printing first chunk to see whats inside: {retrieved_chunks[0]}")
251
+ return retrieved_chunks
252
+
253
+ def initializeLLM(self):
254
+ bnb_config = BitsAndBytesConfig(
255
+ load_in_8bit=True,
256
+ #bnb_8bit_use_double_quant=True,
257
+ #bnb_8bit_quant_type="nf4",
258
+ #bnb_8bit_compute_dtype=torch.bfloat16,
259
+ )
260
+ llm = AutoModelForCausalLM.from_pretrained(
261
+ self.llm_path, quantization_config=bnb_config
262
+ )
263
  self.llmtokenizer = AutoTokenizer.from_pretrained(self.llm_path)
264
+ self.streamer=TextIteratorStreamer(self.llmtokenizer, skip_prompt=True)
 
265
  self.llmpipeline = pipeline(
266
  model=llm,
267
  tokenizer=self.llmtokenizer,
 
270
  temperature=0.7,
271
  repetition_penalty=1.1,
272
  return_full_text=False,
273
+ streamer=self.streamer,
274
  max_new_tokens=500,
275
  )
276
 
277
+ def queryLLM(self,query):
278
+ #resp = self.llmpipeline(chat) Fixen
279
+ return(self.llmpipeline(query)[0]["generated_text"])
280
+
281
+ def initializeRerankingModel(self):
282
+ self.rerankingModel = RAGPretrainedModel.from_pretrained(self.rerankModelPath)
283
+
284
+
285
+ def ragPrompt(self, query, rerankingStep, history):
286
+ prompt_in_chat_format = [
287
+ {
288
+ "role": "system",
289
+ "content": """You are an helpful Chatbot for the BSI IT-Grundschutz. Using the information contained in the context,
290
+ give a comprehensive answer to the question.
291
+ Respond only to the question asked, response should be concise and relevant but also give some context to the question.
292
+ Provide the source document when relevant for the understanding.
293
+ If the answer cannot be deduced from the context, do not give an answer.""",
294
+ },
295
+ {
296
+ "role": "user",
297
+ "content": """Context:
298
+ {context}
299
+ ---
300
+ Chat-History:
301
+ {history}
302
+ ---
303
+ Now here is the question you need to answer.
304
+
305
+ Question: {question}""",
306
+ },
307
+ ]
308
+ RAG_PROMPT_TEMPLATE = self.llmtokenizer.apply_chat_template(
309
+ prompt_in_chat_format, tokenize=False, add_generation_prompt=True
310
+ )
311
+ retrieved_chunks = self.retrieveSimiliarEmbedding(query)
312
+ retrieved_chunks_text = []
313
+ #TODO Irgendwas stimmt hier mit den Listen nicht
314
+ for chunk in retrieved_chunks:
315
+ #TODO Hier noch was smarteres Überlegen für alle Header
316
+ if "Header 1" in chunk.metadata.keys():
317
+ retrieved_chunks_text.append(f"The Document is: '{chunk.metadata["source"]}'\nHeader of the Section is: '{chunk.metadata["Header 1"]}' and Content of it:{chunk.page_content}")
318
+ else:
319
+ retrieved_chunks_text.append(
320
+ f"The Document is: '{chunk.metadata["source"]}'\nImage Description is: ':{chunk.page_content}")
321
+ i=1
322
+ for chunk in retrieved_chunks_text:
323
+ print(f"Retrieved Chunk number {i}:\n{chunk}")
324
+ i=i+1
325
+
326
+ if rerankingStep==True:
327
+ if self.rerankingModel == None:
328
+ print ("initializing Reranker-Model..")
329
+ self.initializeRerankingModel()
330
+ print("Starting Reranking Chunks...")
331
+ self.rerankingModel
332
+ retrieved_chunks_text=self.rerankingModel.rerank(query, retrieved_chunks_text,k=5)
333
+ retrieved_chunks_text=[chunk["content"] for chunk in retrieved_chunks_text]
334
+
335
+ i = 1
336
+ for chunk in retrieved_chunks_text:
337
+ print(f"Reranked Chunk number {i}:\n{chunk}")
338
+ i = i + 1
339
+
340
+ context = "\nExtracted documents:\n"
341
+ context += "".join([doc for i, doc in enumerate(retrieved_chunks_text)])
342
+ #Alles außer letzte Useranfrage
343
+ final_prompt = RAG_PROMPT_TEMPLATE.format(
344
+ question=query, context=context, history=history[:-1]
345
+ )
346
+
347
+ print(f"Query:\n{final_prompt}")
348
+ pattern = r"Filename:(.*?);"
349
+ match = re.findall(pattern, final_prompt)
350
+ self.images=match
351
+
352
+ #queryModel = HuggingFacePipeline(pipeline = self.llmpipeline)
353
+ generation_thread = threading.Thread(target=self.llmpipeline, args=(final_prompt,))
354
+ generation_thread.start()
355
+
356
+ return self.streamer
357
+
358
+ #answer=self.queryLLM(final_prompt)
359
+ #answer = self.llmpipeline(final_prompt)
360
+ #for token in answer:
361
+ # print (token["generated_text"])
362
+ # yield token["generated_text"]
363
+ # gen = queryModel.stream(final_prompt)
364
+
365
+
366
+ #return gen
367
+
368
+ #print (f"Answer:\n{answer}")
369
+
370
+ def returnImages(self):
371
+ imageList = []
372
+ for image in self.images:
373
+ imageList.append(f"{self.docs}\\{image}")
374
+ return imageList
375
+
376
+ def launchGr(self):
377
+ gr.Interface.from_pipeline(self.llmpipeline).launch()
378
+
379
+
380
 
381
  if __name__ == '__main__':
382
+ #RAW_KNOWLEDGE_BASE = []
383
+ #RAW_KNOWLEDGE_BASE.append(LangchainDocument(page_content="1Text", metadata={"source": "bb"}))
384
+ #RAW_KNOWLEDGE_BASE.append(LangchainDocument(page_content="2Text", metadata={"source": "aa"}))
385
+ #RAW_KNOWLEDGE_BASE[0].metadata.update({"NeuerKey":"White"})
386
+ #print(RAW_KNOWLEDGE_BASE)
387
+ #time.sleep(10)
388
+
389
+ #{doc.page_content} [{doc.metadata}] => aktuellen Header in jeden Chunk embedden; Doc.Metadata retrieven
390
+
391
+ renewEmbeddings = False
392
+ reranking = True
393
+ bot = BSIChatbot()
394
+ bot.initializeEmbeddingModel(renewEmbeddings)
395
+ if reranking == True:
396
+ bot.initializeRerankingModel()
397
+ #TODO: DEBUG:
398
+ #bot.retrieveSimiliarEmbedding("Was ist der IT-Grundschutz?")
399
+ #TODO: DEBUG:
400
+ #time.sleep(10)
401
+ bot.initializeLLM()
402
+ #bot.retrieveSimiliarEmbedding("Welche Typen von Anforderungen gibt es im IT-Grundschutz?")
403
+
404
+ #bot.queryLLM("Welche Typen von Anforderungen gibt es im IT-Grundschutz?")
405
+
406
+ #bot.ragPrompt("""
407
+ #Welche Informationen beinhaltet die IT-Grundschutz-Methodik (BSI-Standard 200-2)? Wähle aus den folgenden Antwortmöglichkeiten (mehrere können richtig sein!):
408
+ #A: besonders schutzwürdigen Komponenten,
409
+ #B: methodische Hilfestellungen zur schrittweisen Einführung eines ISMS,
410
+ #C: wie die Informationssicherheit im laufenden Betrieb aufrechterhalten und kontinuierlich verbessert werden kann,
411
+ #D: effiziente Verfahren, um die allgemeinen Anforderungen des BSI-Standards 200-1 zu konkretisieren
412
+ #""", True)
413
+
414
+ #bot.launchGr()
415
+
416
+ with gr.Blocks() as demo:
417
+ with gr.Row() as row:
418
+ with gr.Column(scale=3):
419
+ chatbot = gr.Chatbot(type="messages")
420
+ msg = gr.Textbox()
421
+ clear = gr.Button("Clear")
422
+ reset = gr.Button("Reset")
423
+ with gr.Column(scale=1): # Bildergalerie
424
+ gallery = gr.Gallery(label="Bildergalerie",elem_id="gallery")
425
+
426
+ def user(user_message, history: list):
427
+ return "", history + [{"role": "user", "content": user_message}]
428
+
429
+
430
+ def returnImages():
431
+ # Hier holen wir uns die Bildpfade und wandeln sie in gr.Image-Objekte um
432
+ image_paths = bot.returnImages()
433
+ print(f"returning images: {image_paths}")
434
+ return image_paths
435
+
436
+ def gradiobot(history: list):
437
+ start = time.time()
438
+ print(f"ragQuery hist -1:{history[-1].get("content")}")
439
+ print(f"ragQuery hist 0:{history[0].get("content")}")
440
+ print(f"fullHistory: {history}" )
441
+ bot_response = bot.ragPrompt(history[-1].get("content"), reranking, history)
442
+ history.append({"role": "assistant", "content": ""})
443
+
444
+ image_gallery = returnImages()
445
+
446
+ for token in bot_response:
447
+ if "eot_id" in token:
448
+ token = token.replace("<|eot_id|>","")
449
+ if token.startswith("-"):
450
+ token = f"\n{token}"
451
+ if re.match(r"^[1-9]\.",token):
452
+ token = f"\n{token}"
453
+
454
+ history[-1]['content'] += token
455
+ yield history, image_gallery
456
+ end = time.time()
457
+ print("End2End Query took", end - start, "seconds!")
458
+
459
+ def resetHistory():
460
+ return []
461
+
462
+ msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
463
+ gradiobot, inputs=[chatbot], outputs=[chatbot, gallery]
464
+ )
465
+
466
+
467
+ clear.click(lambda: None, None, chatbot, queue=False)
468
+ reset.click(resetHistory, outputs=chatbot, queue=False)
469
+ demo.css = """
470
+ #gallery {
471
+ display: grid;
472
+ grid-template-columns: repeat(2, 1fr);
473
+ gap: 10px;
474
+ height: 400px;
475
+ overflow: auto;
476
+ }
477
+ """
478
+ demo.launch(allowed_paths=["/home/user/app/docs"])
479
+
480
+ #Answer: B, C und D => Korrekt!
481
+
482
+ # See PyCharm help at https://www.jetbrains.com/help/pycharm/