removing Thread
Browse files
app.py
CHANGED
@@ -1,4 +1,8 @@
|
|
1 |
import os
|
|
|
|
|
|
|
|
|
2 |
import asyncio
|
3 |
import sys
|
4 |
from typing import List, Dict
|
@@ -17,8 +21,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
|
17 |
from huggingface_hub import login
|
18 |
|
19 |
# Environment setup
|
20 |
-
|
21 |
-
os.environ["allow_dangerous_deserialization"] = "True"
|
22 |
|
23 |
HF_KEY = os.getenv('Gated_Repo')
|
24 |
embedding_path = "/home/user/app/docs/_embeddings/index.faiss"
|
@@ -39,7 +42,7 @@ class BSIChatbot:
|
|
39 |
self.llm_path = model_paths['llm_path']
|
40 |
self.word_and_embed_model_path = model_paths['embed_model_path']
|
41 |
self.docs = docs_path
|
42 |
-
|
43 |
async def initialize_embedding_model(self, rebuild_embeddings: bool):
|
44 |
raw_knowledge_base = []
|
45 |
|
@@ -85,6 +88,7 @@ class BSIChatbot:
|
|
85 |
# Load existing vector store
|
86 |
self.vectorstore = FAISS.load_local(os.path.join(self.docs, "_embeddings"), self.embedding_model, allow_dangerous_deserialization=True)
|
87 |
|
|
|
88 |
async def retrieve_similar_embedding(self, query: str):
|
89 |
if self.vectorstore is None:
|
90 |
self.vectorstore = FAISS.load_local(os.path.join(self.docs, "_embeddings"), self.embedding_model,
|
@@ -93,6 +97,7 @@ class BSIChatbot:
|
|
93 |
query = f"Instruct: Given a search query, retrieve the relevant passages that answer the query\nQuery:{query}"
|
94 |
return self.vectorstore.similarity_search(query=query, k=20)
|
95 |
|
|
|
96 |
async def initialize_llm(self):
|
97 |
bnb_config = BitsAndBytesConfig(load_in_8bit=True)
|
98 |
llm = AutoModelForCausalLM.from_pretrained(self.llm_path, quantization_config=bnb_config)
|
@@ -108,6 +113,7 @@ class BSIChatbot:
|
|
108 |
max_new_tokens=500,
|
109 |
)
|
110 |
|
|
|
111 |
async def rag_prompt(self, query: str, rerank: bool, history: List[Dict]):
|
112 |
retrieved_chunks = await self.retrieve_similar_embedding(query)
|
113 |
retrieved_texts = [f"{chunk.metadata['source']}:\n{chunk.page_content}" for chunk in retrieved_chunks]
|
@@ -125,6 +131,7 @@ class BSIChatbot:
|
|
125 |
response = await self._generate_response_async(final_prompt)
|
126 |
return response
|
127 |
|
|
|
128 |
async def _generate_response_async(self, final_prompt: str):
|
129 |
loop = asyncio.get_event_loop()
|
130 |
tokens = await loop.run_in_executor(None, self.llmpipeline, final_prompt)
|
|
|
1 |
import os
|
2 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = "" # Disable CUDA initialization
|
3 |
+
os.environ["allow_dangerous_deserialization"] = "True"
|
4 |
+
|
5 |
+
import spaces
|
6 |
import asyncio
|
7 |
import sys
|
8 |
from typing import List, Dict
|
|
|
21 |
from huggingface_hub import login
|
22 |
|
23 |
# Environment setup
|
24 |
+
|
|
|
25 |
|
26 |
HF_KEY = os.getenv('Gated_Repo')
|
27 |
embedding_path = "/home/user/app/docs/_embeddings/index.faiss"
|
|
|
42 |
self.llm_path = model_paths['llm_path']
|
43 |
self.word_and_embed_model_path = model_paths['embed_model_path']
|
44 |
self.docs = docs_path
|
45 |
+
@spaces.GPU
|
46 |
async def initialize_embedding_model(self, rebuild_embeddings: bool):
|
47 |
raw_knowledge_base = []
|
48 |
|
|
|
88 |
# Load existing vector store
|
89 |
self.vectorstore = FAISS.load_local(os.path.join(self.docs, "_embeddings"), self.embedding_model, allow_dangerous_deserialization=True)
|
90 |
|
91 |
+
@spaces.GPU
|
92 |
async def retrieve_similar_embedding(self, query: str):
|
93 |
if self.vectorstore is None:
|
94 |
self.vectorstore = FAISS.load_local(os.path.join(self.docs, "_embeddings"), self.embedding_model,
|
|
|
97 |
query = f"Instruct: Given a search query, retrieve the relevant passages that answer the query\nQuery:{query}"
|
98 |
return self.vectorstore.similarity_search(query=query, k=20)
|
99 |
|
100 |
+
@spaces.GPU
|
101 |
async def initialize_llm(self):
|
102 |
bnb_config = BitsAndBytesConfig(load_in_8bit=True)
|
103 |
llm = AutoModelForCausalLM.from_pretrained(self.llm_path, quantization_config=bnb_config)
|
|
|
113 |
max_new_tokens=500,
|
114 |
)
|
115 |
|
116 |
+
@spaces.GPU
|
117 |
async def rag_prompt(self, query: str, rerank: bool, history: List[Dict]):
|
118 |
retrieved_chunks = await self.retrieve_similar_embedding(query)
|
119 |
retrieved_texts = [f"{chunk.metadata['source']}:\n{chunk.page_content}" for chunk in retrieved_chunks]
|
|
|
131 |
response = await self._generate_response_async(final_prompt)
|
132 |
return response
|
133 |
|
134 |
+
@spaces.GPU
|
135 |
async def _generate_response_async(self, final_prompt: str):
|
136 |
loop = asyncio.get_event_loop()
|
137 |
tokens = await loop.run_in_executor(None, self.llmpipeline, final_prompt)
|