Added RemoteRag with QWEN
Browse files
app.py
CHANGED
@@ -18,6 +18,7 @@ import gradio as gr
|
|
18 |
import threading
|
19 |
import re
|
20 |
|
|
|
21 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
22 |
from langchain_community.docstore import InMemoryDocstore
|
23 |
from langchain_community.document_loaders import TextLoader
|
@@ -50,6 +51,7 @@ from huggingface_hub import login
|
|
50 |
|
51 |
login(token=HF_KEY)
|
52 |
|
|
|
53 |
vectorstore=None
|
54 |
rerankingModel=None
|
55 |
|
@@ -67,6 +69,13 @@ class BSIChatbot:
|
|
67 |
# 'rerank_model_path': 'domci/ColBERTv2-mmarco-de-0.1'
|
68 |
# }
|
69 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
llm_path = "meta-llama/Llama-3.2-3B-Instruct"
|
71 |
word_and_embed_model_path = "intfloat/multilingual-e5-large-instruct"
|
72 |
docs = "/home/user/app/docs"
|
@@ -284,7 +293,7 @@ class BSIChatbot:
|
|
284 |
rerankingModel = RAGPretrainedModel.from_pretrained(self.rerankModelPath)
|
285 |
|
286 |
#@spaces.GPU
|
287 |
-
def
|
288 |
global rerankingModel
|
289 |
prompt_in_chat_format = [
|
290 |
{
|
@@ -352,11 +361,16 @@ class BSIChatbot:
|
|
352 |
match = re.findall(pattern, final_prompt)
|
353 |
self.images=match
|
354 |
|
355 |
-
|
356 |
-
|
357 |
-
|
|
|
|
|
|
|
|
|
|
|
358 |
|
359 |
-
return self.streamer
|
360 |
|
361 |
#answer=self.queryLLM(final_prompt)
|
362 |
#answer = self.llmpipeline(final_prompt)
|
@@ -370,6 +384,80 @@ class BSIChatbot:
|
|
370 |
|
371 |
#print (f"Answer:\n{answer}")
|
372 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
373 |
def returnImages(self):
|
374 |
imageList = []
|
375 |
for image in self.images:
|
@@ -441,7 +529,7 @@ if __name__ == '__main__':
|
|
441 |
print(f"ragQuery hist -1:{history[-1].get('content')}")
|
442 |
print(f"ragQuery hist 0:{history[0].get('content')}")
|
443 |
print(f"fullHistory: {history}" )
|
444 |
-
bot_response = bot.
|
445 |
history.append({"role": "assistant", "content": ""})
|
446 |
|
447 |
image_gallery = returnImages()
|
|
|
18 |
import threading
|
19 |
import re
|
20 |
|
21 |
+
from openai import OpenAI
|
22 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
23 |
from langchain_community.docstore import InMemoryDocstore
|
24 |
from langchain_community.document_loaders import TextLoader
|
|
|
51 |
|
52 |
login(token=HF_KEY)
|
53 |
|
54 |
+
SAIA_KEY = SAIA_KEY
|
55 |
vectorstore=None
|
56 |
rerankingModel=None
|
57 |
|
|
|
69 |
# 'rerank_model_path': 'domci/ColBERTv2-mmarco-de-0.1'
|
70 |
# }
|
71 |
|
72 |
+
llm_base_url = "https://chat-ai.academiccloud.de/v1"
|
73 |
+
llm_remote_model = "qwen2.5-72b-instruct"
|
74 |
+
llm_client = OpenAI(
|
75 |
+
api_key = SAIA_KEY,
|
76 |
+
base_url = llm_base_url
|
77 |
+
)
|
78 |
+
|
79 |
llm_path = "meta-llama/Llama-3.2-3B-Instruct"
|
80 |
word_and_embed_model_path = "intfloat/multilingual-e5-large-instruct"
|
81 |
docs = "/home/user/app/docs"
|
|
|
293 |
rerankingModel = RAGPretrainedModel.from_pretrained(self.rerankModelPath)
|
294 |
|
295 |
#@spaces.GPU
|
296 |
+
def ragPromptRemote(self, query, rerankingStep, history):
|
297 |
global rerankingModel
|
298 |
prompt_in_chat_format = [
|
299 |
{
|
|
|
361 |
match = re.findall(pattern, final_prompt)
|
362 |
self.images=match
|
363 |
|
364 |
+
stream = llm_client.chat.completions.create(
|
365 |
+
messages=final_prompt,
|
366 |
+
model=llm_remote_model,
|
367 |
+
stream=True
|
368 |
+
)
|
369 |
+
return stream
|
370 |
+
#generation_thread = threading.Thread(target=self.llmpipeline, args=(final_prompt,))
|
371 |
+
#generation_thread.start()
|
372 |
|
373 |
+
#return self.streamer
|
374 |
|
375 |
#answer=self.queryLLM(final_prompt)
|
376 |
#answer = self.llmpipeline(final_prompt)
|
|
|
384 |
|
385 |
#print (f"Answer:\n{answer}")
|
386 |
|
387 |
+
def ragPrompt(self, query, rerankingStep, history):
|
388 |
+
global rerankingModel
|
389 |
+
prompt_in_chat_format = [
|
390 |
+
{
|
391 |
+
"role": "system",
|
392 |
+
"content": """You are an helpful Chatbot for the BSI IT-Grundschutz. Using the information contained in the context,
|
393 |
+
give a comprehensive answer to the question.
|
394 |
+
Respond only to the question asked, response should be concise and relevant but also give some context to the question.
|
395 |
+
Provide the source document when relevant for the understanding.
|
396 |
+
If the answer cannot be deduced from the context, do not give an answer.""",
|
397 |
+
},
|
398 |
+
{
|
399 |
+
"role": "user",
|
400 |
+
"content": """Context:
|
401 |
+
{context}
|
402 |
+
---
|
403 |
+
Chat-History:
|
404 |
+
{history}
|
405 |
+
---
|
406 |
+
Now here is the question you need to answer.
|
407 |
+
|
408 |
+
Question: {question}""",
|
409 |
+
},
|
410 |
+
]
|
411 |
+
RAG_PROMPT_TEMPLATE = self.llmtokenizer.apply_chat_template(
|
412 |
+
prompt_in_chat_format, tokenize=False, add_generation_prompt=True
|
413 |
+
)
|
414 |
+
retrieved_chunks = self.retrieveSimiliarEmbedding(query)
|
415 |
+
retrieved_chunks_text = []
|
416 |
+
#TODO Irgendwas stimmt hier mit den Listen nicht
|
417 |
+
for chunk in retrieved_chunks:
|
418 |
+
#TODO Hier noch was smarteres Überlegen für alle Header
|
419 |
+
if "Header 1" in chunk.metadata.keys():
|
420 |
+
retrieved_chunks_text.append(f"The Document is: '{chunk.metadata['source']}'\nHeader of the Section is: '{chunk.metadata['Header 1']}' and Content of it:{chunk.page_content}")
|
421 |
+
else:
|
422 |
+
retrieved_chunks_text.append(
|
423 |
+
f"The Document is: '{chunk.metadata['source']}'\nImage Description is: ':{chunk.page_content}")
|
424 |
+
i=1
|
425 |
+
for chunk in retrieved_chunks_text:
|
426 |
+
print(f"Retrieved Chunk number {i}:\n{chunk}")
|
427 |
+
i=i+1
|
428 |
+
|
429 |
+
if rerankingStep==True:
|
430 |
+
if rerankingModel == None:
|
431 |
+
print ("initializing Reranker-Model..")
|
432 |
+
self.initializeRerankingModel()
|
433 |
+
print("Starting Reranking Chunks...")
|
434 |
+
rerankingModel
|
435 |
+
retrieved_chunks_text=rerankingModel.rerank(query, retrieved_chunks_text,k=5)
|
436 |
+
retrieved_chunks_text=[chunk["content"] for chunk in retrieved_chunks_text]
|
437 |
+
|
438 |
+
i = 1
|
439 |
+
for chunk in retrieved_chunks_text:
|
440 |
+
print(f"Reranked Chunk number {i}:\n{chunk}")
|
441 |
+
i = i + 1
|
442 |
+
|
443 |
+
context = "\nExtracted documents:\n"
|
444 |
+
context += "".join([doc for i, doc in enumerate(retrieved_chunks_text)])
|
445 |
+
#Alles außer letzte Useranfrage
|
446 |
+
final_prompt = RAG_PROMPT_TEMPLATE.format(
|
447 |
+
question=query, context=context, history=history[:-1]
|
448 |
+
)
|
449 |
+
|
450 |
+
print(f"Query:\n{final_prompt}")
|
451 |
+
pattern = r"Filename:(.*?);"
|
452 |
+
match = re.findall(pattern, final_prompt)
|
453 |
+
self.images=match
|
454 |
+
|
455 |
+
#queryModel = HuggingFacePipeline(pipeline = self.llmpipeline)
|
456 |
+
generation_thread = threading.Thread(target=self.llmpipeline, args=(final_prompt,))
|
457 |
+
generation_thread.start()
|
458 |
+
|
459 |
+
return self.streamer
|
460 |
+
|
461 |
def returnImages(self):
|
462 |
imageList = []
|
463 |
for image in self.images:
|
|
|
529 |
print(f"ragQuery hist -1:{history[-1].get('content')}")
|
530 |
print(f"ragQuery hist 0:{history[0].get('content')}")
|
531 |
print(f"fullHistory: {history}" )
|
532 |
+
bot_response = bot.ragPromptRemote(history[-1].get('content'), reranking, history)
|
533 |
history.append({"role": "assistant", "content": ""})
|
534 |
|
535 |
image_gallery = returnImages()
|