MikeMann commited on
Commit
dbb4df0
·
1 Parent(s): 9b322ba

removing Thread

Browse files
Files changed (1) hide show
  1. app.py +10 -3
app.py CHANGED
@@ -1,4 +1,8 @@
1
  import os
 
 
 
 
2
  import asyncio
3
  import sys
4
  from typing import List, Dict
@@ -17,8 +21,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
17
  from huggingface_hub import login
18
 
19
  # Environment setup
20
- os.environ["CUDA_VISIBLE_DEVICES"] = "" # Disable CUDA initialization
21
- os.environ["allow_dangerous_deserialization"] = "True"
22
 
23
  HF_KEY = os.getenv('Gated_Repo')
24
  embedding_path = "/home/user/app/docs/_embeddings/index.faiss"
@@ -39,7 +42,7 @@ class BSIChatbot:
39
  self.llm_path = model_paths['llm_path']
40
  self.word_and_embed_model_path = model_paths['embed_model_path']
41
  self.docs = docs_path
42
-
43
  async def initialize_embedding_model(self, rebuild_embeddings: bool):
44
  raw_knowledge_base = []
45
 
@@ -85,6 +88,7 @@ class BSIChatbot:
85
  # Load existing vector store
86
  self.vectorstore = FAISS.load_local(os.path.join(self.docs, "_embeddings"), self.embedding_model, allow_dangerous_deserialization=True)
87
 
 
88
  async def retrieve_similar_embedding(self, query: str):
89
  if self.vectorstore is None:
90
  self.vectorstore = FAISS.load_local(os.path.join(self.docs, "_embeddings"), self.embedding_model,
@@ -93,6 +97,7 @@ class BSIChatbot:
93
  query = f"Instruct: Given a search query, retrieve the relevant passages that answer the query\nQuery:{query}"
94
  return self.vectorstore.similarity_search(query=query, k=20)
95
 
 
96
  async def initialize_llm(self):
97
  bnb_config = BitsAndBytesConfig(load_in_8bit=True)
98
  llm = AutoModelForCausalLM.from_pretrained(self.llm_path, quantization_config=bnb_config)
@@ -108,6 +113,7 @@ class BSIChatbot:
108
  max_new_tokens=500,
109
  )
110
 
 
111
  async def rag_prompt(self, query: str, rerank: bool, history: List[Dict]):
112
  retrieved_chunks = await self.retrieve_similar_embedding(query)
113
  retrieved_texts = [f"{chunk.metadata['source']}:\n{chunk.page_content}" for chunk in retrieved_chunks]
@@ -125,6 +131,7 @@ class BSIChatbot:
125
  response = await self._generate_response_async(final_prompt)
126
  return response
127
 
 
128
  async def _generate_response_async(self, final_prompt: str):
129
  loop = asyncio.get_event_loop()
130
  tokens = await loop.run_in_executor(None, self.llmpipeline, final_prompt)
 
1
  import os
2
+ os.environ["CUDA_VISIBLE_DEVICES"] = "" # Disable CUDA initialization
3
+ os.environ["allow_dangerous_deserialization"] = "True"
4
+
5
+ import spaces
6
  import asyncio
7
  import sys
8
  from typing import List, Dict
 
21
  from huggingface_hub import login
22
 
23
  # Environment setup
24
+
 
25
 
26
  HF_KEY = os.getenv('Gated_Repo')
27
  embedding_path = "/home/user/app/docs/_embeddings/index.faiss"
 
42
  self.llm_path = model_paths['llm_path']
43
  self.word_and_embed_model_path = model_paths['embed_model_path']
44
  self.docs = docs_path
45
+ @spaces.GPU
46
  async def initialize_embedding_model(self, rebuild_embeddings: bool):
47
  raw_knowledge_base = []
48
 
 
88
  # Load existing vector store
89
  self.vectorstore = FAISS.load_local(os.path.join(self.docs, "_embeddings"), self.embedding_model, allow_dangerous_deserialization=True)
90
 
91
+ @spaces.GPU
92
  async def retrieve_similar_embedding(self, query: str):
93
  if self.vectorstore is None:
94
  self.vectorstore = FAISS.load_local(os.path.join(self.docs, "_embeddings"), self.embedding_model,
 
97
  query = f"Instruct: Given a search query, retrieve the relevant passages that answer the query\nQuery:{query}"
98
  return self.vectorstore.similarity_search(query=query, k=20)
99
 
100
+ @spaces.GPU
101
  async def initialize_llm(self):
102
  bnb_config = BitsAndBytesConfig(load_in_8bit=True)
103
  llm = AutoModelForCausalLM.from_pretrained(self.llm_path, quantization_config=bnb_config)
 
113
  max_new_tokens=500,
114
  )
115
 
116
+ @spaces.GPU
117
  async def rag_prompt(self, query: str, rerank: bool, history: List[Dict]):
118
  retrieved_chunks = await self.retrieve_similar_embedding(query)
119
  retrieved_texts = [f"{chunk.metadata['source']}:\n{chunk.page_content}" for chunk in retrieved_chunks]
 
131
  response = await self._generate_response_async(final_prompt)
132
  return response
133
 
134
+ @spaces.GPU
135
  async def _generate_response_async(self, final_prompt: str):
136
  loop = asyncio.get_event_loop()
137
  tokens = await loop.run_in_executor(None, self.llmpipeline, final_prompt)