app.py
Browse files
app.py
CHANGED
|
@@ -34,7 +34,7 @@ class BSIChatbot:
|
|
| 34 |
self.docs = docs_path
|
| 35 |
self.rerank_model_path = model_paths['rerank_model_path']
|
| 36 |
|
| 37 |
-
|
| 38 |
def initialize_embedding_model(self, rebuild_embeddings: bool):
|
| 39 |
raw_knowledge_base = []
|
| 40 |
|
|
@@ -80,10 +80,12 @@ class BSIChatbot:
|
|
| 80 |
# Load existing vector store
|
| 81 |
self.vectorstore = FAISS.load_local(os.path.join(self.docs, "_embeddings"), self.embedding_model)
|
| 82 |
|
|
|
|
| 83 |
def retrieve_similar_embedding(self, query: str):
|
| 84 |
query = f"Instruct: Given a search query, retrieve the relevant passages that answer the query\nQuery:{query}"
|
| 85 |
return self.vectorstore.similarity_search(query=query, k=20)
|
| 86 |
|
|
|
|
| 87 |
def initialize_llm(self):
|
| 88 |
bnb_config = BitsAndBytesConfig(load_in_8bit=True)
|
| 89 |
llm = AutoModelForCausalLM.from_pretrained(self.llm_path, quantization_config=bnb_config)
|
|
@@ -102,7 +104,7 @@ class BSIChatbot:
|
|
| 102 |
max_new_tokens=500,
|
| 103 |
)
|
| 104 |
|
| 105 |
-
|
| 106 |
def rag_prompt(self, query: str, rerank: bool, history: List[Dict]):
|
| 107 |
retrieved_chunks = self.retrieve_similar_embedding(query)
|
| 108 |
retrieved_texts = [f"{chunk.metadata['source']}:\n{chunk.page_content}" for chunk in retrieved_chunks]
|
|
@@ -125,7 +127,7 @@ class BSIChatbot:
|
|
| 125 |
|
| 126 |
return self.streamer
|
| 127 |
|
| 128 |
-
|
| 129 |
def launch_interface(self):
|
| 130 |
with gr.Blocks() as demo:
|
| 131 |
chatbot = gr.Chatbot(type="messages")
|
|
|
|
| 34 |
self.docs = docs_path
|
| 35 |
self.rerank_model_path = model_paths['rerank_model_path']
|
| 36 |
|
| 37 |
+
@spaces.GPU
|
| 38 |
def initialize_embedding_model(self, rebuild_embeddings: bool):
|
| 39 |
raw_knowledge_base = []
|
| 40 |
|
|
|
|
| 80 |
# Load existing vector store
|
| 81 |
self.vectorstore = FAISS.load_local(os.path.join(self.docs, "_embeddings"), self.embedding_model)
|
| 82 |
|
| 83 |
+
@spaces.GPU
|
| 84 |
def retrieve_similar_embedding(self, query: str):
|
| 85 |
query = f"Instruct: Given a search query, retrieve the relevant passages that answer the query\nQuery:{query}"
|
| 86 |
return self.vectorstore.similarity_search(query=query, k=20)
|
| 87 |
|
| 88 |
+
@spaces.GPU
|
| 89 |
def initialize_llm(self):
|
| 90 |
bnb_config = BitsAndBytesConfig(load_in_8bit=True)
|
| 91 |
llm = AutoModelForCausalLM.from_pretrained(self.llm_path, quantization_config=bnb_config)
|
|
|
|
| 104 |
max_new_tokens=500,
|
| 105 |
)
|
| 106 |
|
| 107 |
+
@spaces.GPU
|
| 108 |
def rag_prompt(self, query: str, rerank: bool, history: List[Dict]):
|
| 109 |
retrieved_chunks = self.retrieve_similar_embedding(query)
|
| 110 |
retrieved_texts = [f"{chunk.metadata['source']}:\n{chunk.page_content}" for chunk in retrieved_chunks]
|
|
|
|
| 127 |
|
| 128 |
return self.streamer
|
| 129 |
|
| 130 |
+
|
| 131 |
def launch_interface(self):
|
| 132 |
with gr.Blocks() as demo:
|
| 133 |
chatbot = gr.Chatbot(type="messages")
|