MikeMann commited on
Commit
6f26ba4
·
1 Parent(s): 48efdec

Added RemoteRag with QWEN

Browse files
Files changed (1) hide show
  1. app.py +94 -6
app.py CHANGED
@@ -18,6 +18,7 @@ import gradio as gr
18
  import threading
19
  import re
20
 
 
21
  from langchain.text_splitter import RecursiveCharacterTextSplitter
22
  from langchain_community.docstore import InMemoryDocstore
23
  from langchain_community.document_loaders import TextLoader
@@ -50,6 +51,7 @@ from huggingface_hub import login
50
 
51
  login(token=HF_KEY)
52
 
 
53
  vectorstore=None
54
  rerankingModel=None
55
 
@@ -67,6 +69,13 @@ class BSIChatbot:
67
  # 'rerank_model_path': 'domci/ColBERTv2-mmarco-de-0.1'
68
  # }
69
 
 
 
 
 
 
 
 
70
  llm_path = "meta-llama/Llama-3.2-3B-Instruct"
71
  word_and_embed_model_path = "intfloat/multilingual-e5-large-instruct"
72
  docs = "/home/user/app/docs"
@@ -284,7 +293,7 @@ class BSIChatbot:
284
  rerankingModel = RAGPretrainedModel.from_pretrained(self.rerankModelPath)
285
 
286
  #@spaces.GPU
287
- def ragPrompt(self, query, rerankingStep, history):
288
  global rerankingModel
289
  prompt_in_chat_format = [
290
  {
@@ -352,11 +361,16 @@ class BSIChatbot:
352
  match = re.findall(pattern, final_prompt)
353
  self.images=match
354
 
355
- #queryModel = HuggingFacePipeline(pipeline = self.llmpipeline)
356
- generation_thread = threading.Thread(target=self.llmpipeline, args=(final_prompt,))
357
- generation_thread.start()
 
 
 
 
 
358
 
359
- return self.streamer
360
 
361
  #answer=self.queryLLM(final_prompt)
362
  #answer = self.llmpipeline(final_prompt)
@@ -370,6 +384,80 @@ class BSIChatbot:
370
 
371
  #print (f"Answer:\n{answer}")
372
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
  def returnImages(self):
374
  imageList = []
375
  for image in self.images:
@@ -441,7 +529,7 @@ if __name__ == '__main__':
441
  print(f"ragQuery hist -1:{history[-1].get('content')}")
442
  print(f"ragQuery hist 0:{history[0].get('content')}")
443
  print(f"fullHistory: {history}" )
444
- bot_response = bot.ragPrompt(history[-1].get('content'), reranking, history)
445
  history.append({"role": "assistant", "content": ""})
446
 
447
  image_gallery = returnImages()
 
18
  import threading
19
  import re
20
 
21
+ from openai import OpenAI
22
  from langchain.text_splitter import RecursiveCharacterTextSplitter
23
  from langchain_community.docstore import InMemoryDocstore
24
  from langchain_community.document_loaders import TextLoader
 
51
 
52
  login(token=HF_KEY)
53
 
54
+ SAIA_KEY = SAIA_KEY
55
  vectorstore=None
56
  rerankingModel=None
57
 
 
69
  # 'rerank_model_path': 'domci/ColBERTv2-mmarco-de-0.1'
70
  # }
71
 
72
+ llm_base_url = "https://chat-ai.academiccloud.de/v1"
73
+ llm_remote_model = "qwen2.5-72b-instruct"
74
+ llm_client = OpenAI(
75
+ api_key = SAIA_KEY,
76
+ base_url = llm_base_url
77
+ )
78
+
79
  llm_path = "meta-llama/Llama-3.2-3B-Instruct"
80
  word_and_embed_model_path = "intfloat/multilingual-e5-large-instruct"
81
  docs = "/home/user/app/docs"
 
293
  rerankingModel = RAGPretrainedModel.from_pretrained(self.rerankModelPath)
294
 
295
  #@spaces.GPU
296
+ def ragPromptRemote(self, query, rerankingStep, history):
297
  global rerankingModel
298
  prompt_in_chat_format = [
299
  {
 
361
  match = re.findall(pattern, final_prompt)
362
  self.images=match
363
 
364
+ stream = llm_client.chat.completions.create(
365
+ messages=final_prompt,
366
+ model=llm_remote_model,
367
+ stream=True
368
+ )
369
+ return stream
370
+ #generation_thread = threading.Thread(target=self.llmpipeline, args=(final_prompt,))
371
+ #generation_thread.start()
372
 
373
+ #return self.streamer
374
 
375
  #answer=self.queryLLM(final_prompt)
376
  #answer = self.llmpipeline(final_prompt)
 
384
 
385
  #print (f"Answer:\n{answer}")
386
 
387
+ def ragPrompt(self, query, rerankingStep, history):
388
+ global rerankingModel
389
+ prompt_in_chat_format = [
390
+ {
391
+ "role": "system",
392
+ "content": """You are an helpful Chatbot for the BSI IT-Grundschutz. Using the information contained in the context,
393
+ give a comprehensive answer to the question.
394
+ Respond only to the question asked, response should be concise and relevant but also give some context to the question.
395
+ Provide the source document when relevant for the understanding.
396
+ If the answer cannot be deduced from the context, do not give an answer.""",
397
+ },
398
+ {
399
+ "role": "user",
400
+ "content": """Context:
401
+ {context}
402
+ ---
403
+ Chat-History:
404
+ {history}
405
+ ---
406
+ Now here is the question you need to answer.
407
+
408
+ Question: {question}""",
409
+ },
410
+ ]
411
+ RAG_PROMPT_TEMPLATE = self.llmtokenizer.apply_chat_template(
412
+ prompt_in_chat_format, tokenize=False, add_generation_prompt=True
413
+ )
414
+ retrieved_chunks = self.retrieveSimiliarEmbedding(query)
415
+ retrieved_chunks_text = []
416
+ #TODO Irgendwas stimmt hier mit den Listen nicht
417
+ for chunk in retrieved_chunks:
418
+ #TODO Hier noch was smarteres Überlegen für alle Header
419
+ if "Header 1" in chunk.metadata.keys():
420
+ retrieved_chunks_text.append(f"The Document is: '{chunk.metadata['source']}'\nHeader of the Section is: '{chunk.metadata['Header 1']}' and Content of it:{chunk.page_content}")
421
+ else:
422
+ retrieved_chunks_text.append(
423
+ f"The Document is: '{chunk.metadata['source']}'\nImage Description is: ':{chunk.page_content}")
424
+ i=1
425
+ for chunk in retrieved_chunks_text:
426
+ print(f"Retrieved Chunk number {i}:\n{chunk}")
427
+ i=i+1
428
+
429
+ if rerankingStep==True:
430
+ if rerankingModel == None:
431
+ print ("initializing Reranker-Model..")
432
+ self.initializeRerankingModel()
433
+ print("Starting Reranking Chunks...")
434
+ rerankingModel
435
+ retrieved_chunks_text=rerankingModel.rerank(query, retrieved_chunks_text,k=5)
436
+ retrieved_chunks_text=[chunk["content"] for chunk in retrieved_chunks_text]
437
+
438
+ i = 1
439
+ for chunk in retrieved_chunks_text:
440
+ print(f"Reranked Chunk number {i}:\n{chunk}")
441
+ i = i + 1
442
+
443
+ context = "\nExtracted documents:\n"
444
+ context += "".join([doc for i, doc in enumerate(retrieved_chunks_text)])
445
+ #Alles außer letzte Useranfrage
446
+ final_prompt = RAG_PROMPT_TEMPLATE.format(
447
+ question=query, context=context, history=history[:-1]
448
+ )
449
+
450
+ print(f"Query:\n{final_prompt}")
451
+ pattern = r"Filename:(.*?);"
452
+ match = re.findall(pattern, final_prompt)
453
+ self.images=match
454
+
455
+ #queryModel = HuggingFacePipeline(pipeline = self.llmpipeline)
456
+ generation_thread = threading.Thread(target=self.llmpipeline, args=(final_prompt,))
457
+ generation_thread.start()
458
+
459
+ return self.streamer
460
+
461
  def returnImages(self):
462
  imageList = []
463
  for image in self.images:
 
529
  print(f"ragQuery hist -1:{history[-1].get('content')}")
530
  print(f"ragQuery hist 0:{history[0].get('content')}")
531
  print(f"fullHistory: {history}" )
532
+ bot_response = bot.ragPromptRemote(history[-1].get('content'), reranking, history)
533
  history.append({"role": "assistant", "content": ""})
534
 
535
  image_gallery = returnImages()