Spaces:

MikeMann
/

PrototypGrundschutzChatbot

Paused

App Files Files Community

MikeMann commited on Dec 11, 2024

Commit

dbb4df0

1 Parent(s): 9b322ba

removing Thread

Browse files

Files changed (1) hide show

app.py +10 -3

app.py CHANGED Viewed

@@ -1,4 +1,8 @@
 import os
 import asyncio
 import sys
 from typing import List, Dict
@@ -17,8 +21,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 from huggingface_hub import login
 # Environment setup
-os.environ["CUDA_VISIBLE_DEVICES"] = ""  # Disable CUDA initialization
-os.environ["allow_dangerous_deserialization"] = "True"
 HF_KEY = os.getenv('Gated_Repo')
 embedding_path = "/home/user/app/docs/_embeddings/index.faiss"
@@ -39,7 +42,7 @@ class BSIChatbot:
         self.llm_path = model_paths['llm_path']
         self.word_and_embed_model_path = model_paths['embed_model_path']
         self.docs = docs_path
     async def initialize_embedding_model(self, rebuild_embeddings: bool):
         raw_knowledge_base = []
@@ -85,6 +88,7 @@ class BSIChatbot:
             # Load existing vector store
             self.vectorstore = FAISS.load_local(os.path.join(self.docs, "_embeddings"), self.embedding_model, allow_dangerous_deserialization=True)
     async def retrieve_similar_embedding(self, query: str):
         if self.vectorstore is None:
             self.vectorstore = FAISS.load_local(os.path.join(self.docs, "_embeddings"), self.embedding_model,
@@ -93,6 +97,7 @@ class BSIChatbot:
         query = f"Instruct: Given a search query, retrieve the relevant passages that answer the query\nQuery:{query}"
         return self.vectorstore.similarity_search(query=query, k=20)
     async def initialize_llm(self):
         bnb_config = BitsAndBytesConfig(load_in_8bit=True)
         llm = AutoModelForCausalLM.from_pretrained(self.llm_path, quantization_config=bnb_config)
@@ -108,6 +113,7 @@ class BSIChatbot:
             max_new_tokens=500,
         )
     async def rag_prompt(self, query: str, rerank: bool, history: List[Dict]):
         retrieved_chunks = await self.retrieve_similar_embedding(query)
         retrieved_texts = [f"{chunk.metadata['source']}:\n{chunk.page_content}" for chunk in retrieved_chunks]
@@ -125,6 +131,7 @@ class BSIChatbot:
         response = await self._generate_response_async(final_prompt)
         return response
     async def _generate_response_async(self, final_prompt: str):
         loop = asyncio.get_event_loop()
         tokens = await loop.run_in_executor(None, self.llmpipeline, final_prompt)

 import os
+os.environ["CUDA_VISIBLE_DEVICES"] = ""  # Disable CUDA initialization
+os.environ["allow_dangerous_deserialization"] = "True"
+import spaces
 import asyncio
 import sys
 from typing import List, Dict
 from huggingface_hub import login
 # Environment setup
 HF_KEY = os.getenv('Gated_Repo')
 embedding_path = "/home/user/app/docs/_embeddings/index.faiss"
         self.llm_path = model_paths['llm_path']
         self.word_and_embed_model_path = model_paths['embed_model_path']
         self.docs = docs_path
+    @spaces.GPU
     async def initialize_embedding_model(self, rebuild_embeddings: bool):
         raw_knowledge_base = []
             # Load existing vector store
             self.vectorstore = FAISS.load_local(os.path.join(self.docs, "_embeddings"), self.embedding_model, allow_dangerous_deserialization=True)
+    @spaces.GPU
     async def retrieve_similar_embedding(self, query: str):
         if self.vectorstore is None:
             self.vectorstore = FAISS.load_local(os.path.join(self.docs, "_embeddings"), self.embedding_model,
         query = f"Instruct: Given a search query, retrieve the relevant passages that answer the query\nQuery:{query}"
         return self.vectorstore.similarity_search(query=query, k=20)
+    @spaces.GPU
     async def initialize_llm(self):
         bnb_config = BitsAndBytesConfig(load_in_8bit=True)
         llm = AutoModelForCausalLM.from_pretrained(self.llm_path, quantization_config=bnb_config)
             max_new_tokens=500,
         )
+    @spaces.GPU
     async def rag_prompt(self, query: str, rerank: bool, history: List[Dict]):
         retrieved_chunks = await self.retrieve_similar_embedding(query)
         retrieved_texts = [f"{chunk.metadata['source']}:\n{chunk.page_content}" for chunk in retrieved_chunks]
         response = await self._generate_response_async(final_prompt)
         return response
+    @spaces.GPU
     async def _generate_response_async(self, final_prompt: str):
         loop = asyncio.get_event_loop()
         tokens = await loop.run_in_executor(None, self.llmpipeline, final_prompt)