Spaces:
Sleeping
Sleeping
christian
commited on
Commit
·
62944f8
1
Parent(s):
3e0aec0
testing gradio for interface
Browse files- app.py +541 -132
- requirements.txt +7 -0
app.py
CHANGED
@@ -176,151 +176,560 @@
|
|
176 |
# start_server()
|
177 |
|
178 |
|
179 |
-
#!/usr/bin/env python3
|
180 |
-
"""
|
181 |
-
HF Spaces deployment launcher for RAG Chatbot
|
182 |
-
Repository structure: rag_app/ is the git root
|
183 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
import os
|
186 |
import sys
|
187 |
-
import
|
188 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
|
190 |
-
import os
|
191 |
|
192 |
-
|
193 |
-
|
194 |
-
os.environ["HF_HOME"] = "/tmp/hf_cache"
|
195 |
-
os.makedirs("/tmp/hf_cache", exist_ok=True)
|
196 |
-
|
197 |
-
|
198 |
-
print("🚀 Starting HF Spaces deployment setup...")
|
199 |
-
print(f"📁 Current directory: {os.getcwd()}")
|
200 |
-
print(f"📂 Contents: {os.listdir('.')}")
|
201 |
-
|
202 |
-
# Ensure current directory is in Python path
|
203 |
-
sys.path.insert(0, os.getcwd())
|
204 |
-
|
205 |
-
# HF Spaces writable path for ephemeral storage
|
206 |
-
TMP_VECTOR_STORE_ROOT = "/tmp/vector_stores"
|
207 |
-
|
208 |
-
|
209 |
-
def setup_for_spaces():
|
210 |
-
"""Setup vector stores and environment for HF Spaces"""
|
211 |
-
print("🔧 Setting up vector stores for HF Spaces...")
|
212 |
-
|
213 |
-
# Ensure docs folders exist in repo
|
214 |
-
required_dirs = [
|
215 |
-
"./docs",
|
216 |
-
"./docs/mes",
|
217 |
-
"./docs/technical",
|
218 |
-
"./docs/general"
|
219 |
-
]
|
220 |
-
for directory in required_dirs:
|
221 |
-
os.makedirs(directory, exist_ok=True)
|
222 |
-
exists = "✅" if os.path.exists(directory) else "❌"
|
223 |
-
print(f"{exists} Directory: {directory}")
|
224 |
-
|
225 |
-
# Map of vector stores (persist dirs now point to /tmp)
|
226 |
-
store_configs = [
|
227 |
-
("MES Manual", "docs/mes", os.path.join(TMP_VECTOR_STORE_ROOT, "mes_db")),
|
228 |
-
("Technical Docs", "docs/technical",
|
229 |
-
os.path.join(TMP_VECTOR_STORE_ROOT, "tech_db")),
|
230 |
-
("General Docs", "docs/general",
|
231 |
-
os.path.join(TMP_VECTOR_STORE_ROOT, "general_db")),
|
232 |
-
]
|
233 |
-
|
234 |
-
stores_to_build = []
|
235 |
-
for name, doc_path, persist_dir in store_configs:
|
236 |
-
# Check if store already exists in repo or in /tmp
|
237 |
-
if os.path.exists(persist_dir) and os.listdir(persist_dir):
|
238 |
-
print(f"✅ {name} vector store already exists in {persist_dir}")
|
239 |
-
else:
|
240 |
-
stores_to_build.append((name, doc_path, persist_dir))
|
241 |
-
print(f"🔧 {name} vector store needs building in {persist_dir}")
|
242 |
-
|
243 |
-
# Build missing vector stores
|
244 |
-
if stores_to_build:
|
245 |
-
print(f"🏗️ Building {len(stores_to_build)} vector store(s)...")
|
246 |
-
start_time = time.time()
|
247 |
-
MAX_BUILD_TIME = 600 # seconds
|
248 |
-
|
249 |
-
try:
|
250 |
-
from utils.vector_store import build_vector_store
|
251 |
-
print("✅ Vector store utilities imported successfully")
|
252 |
-
|
253 |
-
for name, doc_path, persist_dir in stores_to_build:
|
254 |
-
elapsed = time.time() - start_time
|
255 |
-
if elapsed > MAX_BUILD_TIME:
|
256 |
-
print(
|
257 |
-
f"⏰ Build time limit reached ({elapsed:.1f}s), creating empty store at {persist_dir}")
|
258 |
-
os.makedirs(persist_dir, exist_ok=True)
|
259 |
-
continue
|
260 |
-
|
261 |
-
if os.path.exists(doc_path):
|
262 |
-
doc_files = [f for f in Path(doc_path).rglob(
|
263 |
-
"*") if f.is_file() and not f.name.startswith('.')]
|
264 |
-
if doc_files:
|
265 |
-
print(
|
266 |
-
f"📄 Found {len(doc_files)} document(s) for {name}")
|
267 |
-
try:
|
268 |
-
build_vector_store(
|
269 |
-
doc_path=doc_path, persist_directory=persist_dir)
|
270 |
-
print(f"✅ {name} built successfully")
|
271 |
-
except Exception as e:
|
272 |
-
print(f"❌ Error building {name}: {str(e)}")
|
273 |
-
os.makedirs(persist_dir, exist_ok=True)
|
274 |
-
else:
|
275 |
-
print(f"⚠️ No documents found in {doc_path}")
|
276 |
-
os.makedirs(persist_dir, exist_ok=True)
|
277 |
-
else:
|
278 |
-
print(f"⚠️ Document path not found: {doc_path}")
|
279 |
-
os.makedirs(persist_dir, exist_ok=True)
|
280 |
-
|
281 |
-
except ImportError as e:
|
282 |
-
print(f"❌ Could not import vector store utilities: {e}")
|
283 |
-
for _, _, persist_dir in stores_to_build:
|
284 |
-
os.makedirs(persist_dir, exist_ok=True)
|
285 |
-
|
286 |
-
else:
|
287 |
-
print("✅ All vector stores already exist!")
|
288 |
-
|
289 |
-
print("🎉 Vector store setup completed!")
|
290 |
-
|
291 |
-
|
292 |
-
def start_server():
|
293 |
-
"""Start the FastAPI server"""
|
294 |
-
print("🌐 Starting FastAPI server...")
|
295 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
296 |
try:
|
297 |
-
|
298 |
-
|
299 |
|
300 |
-
|
301 |
|
302 |
-
|
303 |
-
|
|
|
304 |
|
305 |
-
|
306 |
-
|
307 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
308 |
|
309 |
-
except ImportError as e:
|
310 |
-
print(f"❌ Could not import FastAPI app: {e}")
|
311 |
-
sys.exit(1)
|
312 |
except Exception as e:
|
313 |
-
print(f"
|
314 |
-
|
315 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
316 |
|
317 |
-
if __name__ == "__main__":
|
318 |
-
print("=" * 60)
|
319 |
-
print("🎯 RAG Chatbot - HF Spaces Deployment")
|
320 |
-
print("=" * 60)
|
321 |
|
322 |
-
|
323 |
-
|
|
|
|
|
|
|
324 |
|
325 |
-
|
326 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
# start_server()
|
177 |
|
178 |
|
179 |
+
# #!/usr/bin/env python3
|
180 |
+
# """
|
181 |
+
# HF Spaces deployment launcher for RAG Chatbot
|
182 |
+
# Repository structure: rag_app/ is the git root
|
183 |
+
# """
|
184 |
+
|
185 |
+
# import os
|
186 |
+
# import sys
|
187 |
+
# import time
|
188 |
+
# from pathlib import Path
|
189 |
+
|
190 |
+
# import os
|
191 |
+
|
192 |
+
# # HF Spaces only allows writing to /tmp
|
193 |
+
# os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
|
194 |
+
# os.environ["HF_HOME"] = "/tmp/hf_cache"
|
195 |
+
# os.makedirs("/tmp/hf_cache", exist_ok=True)
|
196 |
+
|
197 |
+
|
198 |
+
# print("🚀 Starting HF Spaces deployment setup...")
|
199 |
+
# print(f"📁 Current directory: {os.getcwd()}")
|
200 |
+
# print(f"📂 Contents: {os.listdir('.')}")
|
201 |
+
|
202 |
+
# # Ensure current directory is in Python path
|
203 |
+
# sys.path.insert(0, os.getcwd())
|
204 |
+
|
205 |
+
# # HF Spaces writable path for ephemeral storage
|
206 |
+
# TMP_VECTOR_STORE_ROOT = "/tmp/vector_stores"
|
207 |
+
|
208 |
+
|
209 |
+
# def setup_for_spaces():
|
210 |
+
# """Setup vector stores and environment for HF Spaces"""
|
211 |
+
# print("🔧 Setting up vector stores for HF Spaces...")
|
212 |
+
|
213 |
+
# # Ensure docs folders exist in repo
|
214 |
+
# required_dirs = [
|
215 |
+
# "./docs",
|
216 |
+
# "./docs/mes",
|
217 |
+
# "./docs/technical",
|
218 |
+
# "./docs/general"
|
219 |
+
# ]
|
220 |
+
# for directory in required_dirs:
|
221 |
+
# os.makedirs(directory, exist_ok=True)
|
222 |
+
# exists = "✅" if os.path.exists(directory) else "❌"
|
223 |
+
# print(f"{exists} Directory: {directory}")
|
224 |
+
|
225 |
+
# # Map of vector stores (persist dirs now point to /tmp)
|
226 |
+
# store_configs = [
|
227 |
+
# ("MES Manual", "docs/mes", os.path.join(TMP_VECTOR_STORE_ROOT, "mes_db")),
|
228 |
+
# ("Technical Docs", "docs/technical",
|
229 |
+
# os.path.join(TMP_VECTOR_STORE_ROOT, "tech_db")),
|
230 |
+
# ("General Docs", "docs/general",
|
231 |
+
# os.path.join(TMP_VECTOR_STORE_ROOT, "general_db")),
|
232 |
+
# ]
|
233 |
+
|
234 |
+
# stores_to_build = []
|
235 |
+
# for name, doc_path, persist_dir in store_configs:
|
236 |
+
# # Check if store already exists in repo or in /tmp
|
237 |
+
# if os.path.exists(persist_dir) and os.listdir(persist_dir):
|
238 |
+
# print(f"✅ {name} vector store already exists in {persist_dir}")
|
239 |
+
# else:
|
240 |
+
# stores_to_build.append((name, doc_path, persist_dir))
|
241 |
+
# print(f"🔧 {name} vector store needs building in {persist_dir}")
|
242 |
+
|
243 |
+
# # Build missing vector stores
|
244 |
+
# if stores_to_build:
|
245 |
+
# print(f"🏗️ Building {len(stores_to_build)} vector store(s)...")
|
246 |
+
# start_time = time.time()
|
247 |
+
# MAX_BUILD_TIME = 600 # seconds
|
248 |
+
|
249 |
+
# try:
|
250 |
+
# from utils.vector_store import build_vector_store
|
251 |
+
# print("✅ Vector store utilities imported successfully")
|
252 |
+
|
253 |
+
# for name, doc_path, persist_dir in stores_to_build:
|
254 |
+
# elapsed = time.time() - start_time
|
255 |
+
# if elapsed > MAX_BUILD_TIME:
|
256 |
+
# print(
|
257 |
+
# f"⏰ Build time limit reached ({elapsed:.1f}s), creating empty store at {persist_dir}")
|
258 |
+
# os.makedirs(persist_dir, exist_ok=True)
|
259 |
+
# continue
|
260 |
+
|
261 |
+
# if os.path.exists(doc_path):
|
262 |
+
# doc_files = [f for f in Path(doc_path).rglob(
|
263 |
+
# "*") if f.is_file() and not f.name.startswith('.')]
|
264 |
+
# if doc_files:
|
265 |
+
# print(
|
266 |
+
# f"📄 Found {len(doc_files)} document(s) for {name}")
|
267 |
+
# try:
|
268 |
+
# build_vector_store(
|
269 |
+
# doc_path=doc_path, persist_directory=persist_dir)
|
270 |
+
# print(f"✅ {name} built successfully")
|
271 |
+
# except Exception as e:
|
272 |
+
# print(f"❌ Error building {name}: {str(e)}")
|
273 |
+
# os.makedirs(persist_dir, exist_ok=True)
|
274 |
+
# else:
|
275 |
+
# print(f"⚠️ No documents found in {doc_path}")
|
276 |
+
# os.makedirs(persist_dir, exist_ok=True)
|
277 |
+
# else:
|
278 |
+
# print(f"⚠️ Document path not found: {doc_path}")
|
279 |
+
# os.makedirs(persist_dir, exist_ok=True)
|
280 |
+
|
281 |
+
# except ImportError as e:
|
282 |
+
# print(f"❌ Could not import vector store utilities: {e}")
|
283 |
+
# for _, _, persist_dir in stores_to_build:
|
284 |
+
# os.makedirs(persist_dir, exist_ok=True)
|
285 |
+
|
286 |
+
# else:
|
287 |
+
# print("✅ All vector stores already exist!")
|
288 |
+
|
289 |
+
# print("🎉 Vector store setup completed!")
|
290 |
|
291 |
+
|
292 |
+
# def start_server():
|
293 |
+
# """Start the FastAPI server"""
|
294 |
+
# print("🌐 Starting FastAPI server...")
|
295 |
+
|
296 |
+
# try:
|
297 |
+
# from api.main import app
|
298 |
+
# print("✅ Successfully imported FastAPI app from api.main")
|
299 |
+
|
300 |
+
# import uvicorn
|
301 |
+
|
302 |
+
# port = int(os.environ.get("PORT", 7860))
|
303 |
+
# host = "0.0.0.0"
|
304 |
+
|
305 |
+
# print(f"🚀 Starting server on {host}:{port}")
|
306 |
+
# uvicorn.run(app, host=host, port=port,
|
307 |
+
# log_level="info", access_log=True)
|
308 |
+
|
309 |
+
# except ImportError as e:
|
310 |
+
# print(f"❌ Could not import FastAPI app: {e}")
|
311 |
+
# sys.exit(1)
|
312 |
+
# except Exception as e:
|
313 |
+
# print(f"❌ Error starting server: {e}")
|
314 |
+
# sys.exit(1)
|
315 |
+
|
316 |
+
|
317 |
+
# if __name__ == "__main__":
|
318 |
+
# print("=" * 60)
|
319 |
+
# print("🎯 RAG Chatbot - HF Spaces Deployment")
|
320 |
+
# print("=" * 60)
|
321 |
+
|
322 |
+
# # Setup phase
|
323 |
+
# setup_for_spaces()
|
324 |
+
|
325 |
+
# # Server start phase
|
326 |
+
# start_server()
|
327 |
+
|
328 |
+
|
329 |
+
# app.py - Pure Gradio approach (for Gradio template)
|
330 |
+
from fastapi import Request
|
331 |
+
import requests
|
332 |
+
from dotenv import load_dotenv
|
333 |
+
from utils.vector_store import get_vector_store
|
334 |
+
from pydantic import BaseModel
|
335 |
+
from fastapi import FastAPI, HTTPException, Request
|
336 |
import os
|
337 |
import sys
|
338 |
+
import gradio as gr
|
339 |
+
|
340 |
+
from utils.helpers.chat_mapper import map_answer_to_chat_response
|
341 |
+
|
342 |
+
from fastapi.middleware.cors import CORSMiddleware
|
343 |
+
|
344 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
345 |
+
|
346 |
+
load_dotenv()
|
347 |
+
|
348 |
+
app = FastAPI()
|
349 |
+
|
350 |
+
# Simplified CORS for debugging
|
351 |
+
app.add_middleware(
|
352 |
+
CORSMiddleware,
|
353 |
+
allow_origins=["*"],
|
354 |
+
allow_credentials=True,
|
355 |
+
allow_methods=["*"],
|
356 |
+
allow_headers=["*"],
|
357 |
+
)
|
358 |
+
|
359 |
+
# Vector store mapping for different domains
|
360 |
+
VECTOR_STORE_PATHS = {
|
361 |
+
"mes": "./vector_stores/mes_db",
|
362 |
+
"technical": "./vector_stores/tech_db",
|
363 |
+
"general": "./vector_stores/general_db",
|
364 |
+
"default": "./vector_stores/general_db",
|
365 |
+
}
|
366 |
|
|
|
367 |
|
368 |
+
class QueryRequest(BaseModel):
|
369 |
+
query: str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
370 |
|
371 |
+
|
372 |
+
# Gemini API setup
|
373 |
+
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
|
374 |
+
if not GEMINI_API_KEY:
|
375 |
+
raise ValueError("GEMINI_API_KEY environment variable required")
|
376 |
+
|
377 |
+
GEMINI_API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent"
|
378 |
+
|
379 |
+
# Vector store loader
|
380 |
+
|
381 |
+
|
382 |
+
def load_vector_store_by_prefix(query: str):
|
383 |
+
lower_q = query.lower().strip()
|
384 |
+
for prefix, path in VECTOR_STORE_PATHS.items():
|
385 |
+
if prefix != "default" and lower_q.startswith(f"{prefix}:"):
|
386 |
+
cleaned_query = lower_q[len(prefix) + 1:].strip()
|
387 |
+
return get_vector_store(persist_directory=path), cleaned_query, prefix
|
388 |
+
return get_vector_store(persist_directory=VECTOR_STORE_PATHS["default"]), query, "default"
|
389 |
+
|
390 |
+
|
391 |
+
def generate_answer_with_gemini(query: str, context_docs: list):
|
392 |
+
# Build context string
|
393 |
+
knowledge_parts = []
|
394 |
+
for i, doc in enumerate(context_docs, 1):
|
395 |
+
knowledge_parts.append(f"Data Source {i}: {doc.page_content.strip()}")
|
396 |
+
knowledge_base = "\n\n".join(knowledge_parts)
|
397 |
+
|
398 |
+
# The updated prompt is more direct and forceful
|
399 |
+
prompt = (
|
400 |
+
"You are an expert AI assistant that uses a provided knowledge base to answer questions. "
|
401 |
+
"Your responses must always be based on this knowledge base, which is the ultimate source of truth. "
|
402 |
+
"You will only use your internal knowledge to supplement the answer, never to contradict it. "
|
403 |
+
"If and only if the knowledge base contains absolutely nothing relevant to the user's question, "
|
404 |
+
"you will respond with a polite and concise statement saying you cannot answer the question from the information you have. "
|
405 |
+
"You must never answer 'I don't know' if there is any information in the knowledge base that is even tangentially related to the question. "
|
406 |
+
"Always try your best to construct a useful answer by synthesizing the provided information. "
|
407 |
+
"Do not refer to the 'knowledge base' or 'sources' directly; instead, use phrases like 'based on the information I have'.\n\n"
|
408 |
+
|
409 |
+
f"My knowledge base:\n{knowledge_base}\n\n"
|
410 |
+
f"User's Question: {query}\n\nAnswer:"
|
411 |
+
)
|
412 |
+
|
413 |
+
# print the prompt for debugging
|
414 |
+
print("Prompt sent to Gemini API:", prompt)
|
415 |
+
|
416 |
+
try:
|
417 |
+
response = requests.post(
|
418 |
+
f"{GEMINI_API_URL}?key={GEMINI_API_KEY}",
|
419 |
+
json={
|
420 |
+
"contents": [
|
421 |
+
{
|
422 |
+
"role": "user",
|
423 |
+
"parts": [
|
424 |
+
{"text": prompt}
|
425 |
+
]
|
426 |
+
}
|
427 |
+
],
|
428 |
+
"generationConfig": {
|
429 |
+
"temperature": 0.7,
|
430 |
+
"maxOutputTokens": 300
|
431 |
+
}
|
432 |
+
},
|
433 |
+
timeout=300
|
434 |
+
)
|
435 |
+
|
436 |
+
if response.status_code != 200:
|
437 |
+
return f"API Error: {response.status_code} - {response.text}"
|
438 |
+
|
439 |
+
data = response.json()
|
440 |
+
|
441 |
+
# Extract answer text
|
442 |
+
return (
|
443 |
+
data.get("candidates", [{}])[0]
|
444 |
+
.get("content", {})
|
445 |
+
.get("parts", [{}])[0]
|
446 |
+
.get("text", "")
|
447 |
+
.strip()
|
448 |
+
or "I couldn't generate an answer."
|
449 |
+
)
|
450 |
+
|
451 |
+
except Exception as e:
|
452 |
+
return f"Error: {str(e)}"
|
453 |
+
|
454 |
+
# Middleware for logging requests
|
455 |
+
|
456 |
+
|
457 |
+
@app.middleware("http")
|
458 |
+
async def log_requests(request: Request, call_next):
|
459 |
+
print(f"Request: {request.method} {request.url}")
|
460 |
+
print(f"Headers: {dict(request.headers)}")
|
461 |
+
print(f"Origin: {request.headers.get('origin', 'No Origin')}")
|
462 |
+
print(f"User-Agent: {request.headers.get('user-agent', 'No User-Agent')}")
|
463 |
+
|
464 |
+
response = await call_next(request)
|
465 |
+
print(f"Response Status: {response.status_code}")
|
466 |
+
return response
|
467 |
+
|
468 |
+
# NEW: Gradio interface function
|
469 |
+
|
470 |
+
|
471 |
+
def gradio_chat_interface(query: str) -> str:
|
472 |
+
"""
|
473 |
+
Gradio interface function that uses your existing FastAPI logic
|
474 |
+
"""
|
475 |
try:
|
476 |
+
if not query.strip():
|
477 |
+
return "Please enter a question."
|
478 |
|
479 |
+
print(f"Gradio query: {query}")
|
480 |
|
481 |
+
# Use your existing logic
|
482 |
+
vector_store, cleaned_query, store_key = load_vector_store_by_prefix(
|
483 |
+
query)
|
484 |
|
485 |
+
if not vector_store:
|
486 |
+
return "Vector store not ready. Please try again later."
|
487 |
+
|
488 |
+
retriever = vector_store.as_retriever(
|
489 |
+
search_type="mmr",
|
490 |
+
search_kwargs={
|
491 |
+
"k": 6,
|
492 |
+
"fetch_k": 20,
|
493 |
+
"lambda_mult": 0.5
|
494 |
+
}
|
495 |
+
)
|
496 |
+
|
497 |
+
docs = retriever.get_relevant_documents(cleaned_query)
|
498 |
+
|
499 |
+
# Deduplicate
|
500 |
+
seen = set()
|
501 |
+
unique_docs = []
|
502 |
+
for doc in docs:
|
503 |
+
snippet = doc.page_content.strip()
|
504 |
+
if snippet not in seen:
|
505 |
+
seen.add(snippet)
|
506 |
+
unique_docs.append(doc)
|
507 |
+
docs = unique_docs[:5]
|
508 |
+
|
509 |
+
if not docs:
|
510 |
+
return "I couldn't find any relevant information in the knowledge base to answer your question."
|
511 |
+
|
512 |
+
answer = generate_answer_with_gemini(cleaned_query, docs)
|
513 |
+
|
514 |
+
# Format response for Gradio with better markdown
|
515 |
+
formatted_response = f"## Answer\n\n{answer}\n\n"
|
516 |
+
|
517 |
+
if docs:
|
518 |
+
formatted_response += "## Sources\n\n"
|
519 |
+
for i, doc in enumerate(docs, 1):
|
520 |
+
source_name = doc.metadata.get('source', 'Unknown Source')
|
521 |
+
page = doc.metadata.get('page', '')
|
522 |
+
page_info = f" (Page {page})" if page else ""
|
523 |
+
|
524 |
+
preview = doc.page_content[:400] + "..." if len(
|
525 |
+
doc.page_content) > 400 else doc.page_content
|
526 |
+
formatted_response += f"### {i}. {source_name}{page_info}\n\n{preview}\n\n---\n\n"
|
527 |
+
|
528 |
+
return formatted_response
|
529 |
+
|
530 |
+
except Exception as e:
|
531 |
+
error_msg = f"**Error occurred:**\n\n```\n{str(e)}\n```"
|
532 |
+
print(f"Gradio error: {e}")
|
533 |
+
return error_msg
|
534 |
+
|
535 |
+
# Create Gradio interface
|
536 |
+
|
537 |
+
|
538 |
+
def create_gradio_interface():
|
539 |
+
"""Create and configure the Gradio interface"""
|
540 |
+
|
541 |
+
with gr.Blocks(
|
542 |
+
title="RAG Chatbot",
|
543 |
+
description="Ask questions about your knowledge base and get detailed answers with sources.",
|
544 |
+
theme='soft',
|
545 |
+
|
546 |
+
) as interface:
|
547 |
+
|
548 |
+
gr.Markdown("""
|
549 |
+
# RAG Chatbot
|
550 |
+
|
551 |
+
Ask questions about your knowledge base and get detailed answers with sources.
|
552 |
+
|
553 |
+
**Available Knowledge:**
|
554 |
+
- MES Manual documentation (prefix with "mes:")
|
555 |
+
- Technical documentation (prefix with "technical:")
|
556 |
+
- General documentation (prefix with "general:" or no prefix)
|
557 |
+
""")
|
558 |
+
|
559 |
+
with gr.Row():
|
560 |
+
with gr.Column(scale=4):
|
561 |
+
query_input = gr.Textbox(
|
562 |
+
label="Your Question",
|
563 |
+
placeholder="Enter your question here... (e.g., 'What is machine learning?' or 'mes: How does the system work?')",
|
564 |
+
lines=3,
|
565 |
+
max_lines=10
|
566 |
+
)
|
567 |
+
with gr.Column(scale=1):
|
568 |
+
submit_btn = gr.Button(
|
569 |
+
"Ask Question", variant="primary", size="lg")
|
570 |
+
clear_btn = gr.Button("Clear", variant="secondary")
|
571 |
+
|
572 |
+
answer_output = gr.Markdown(
|
573 |
+
label="Answer & Sources",
|
574 |
+
value="Welcome! Ask a question above to get started."
|
575 |
+
)
|
576 |
+
|
577 |
+
# Event handlers
|
578 |
+
submit_btn.click(
|
579 |
+
gradio_chat_interface,
|
580 |
+
inputs=[query_input],
|
581 |
+
outputs=[answer_output]
|
582 |
+
)
|
583 |
+
|
584 |
+
query_input.submit( # Allow Enter key to submit
|
585 |
+
gradio_chat_interface,
|
586 |
+
inputs=[query_input],
|
587 |
+
outputs=[answer_output]
|
588 |
+
)
|
589 |
+
|
590 |
+
clear_btn.click(
|
591 |
+
lambda: ("", "Welcome! Ask a question above to get started."),
|
592 |
+
outputs=[query_input, answer_output]
|
593 |
+
)
|
594 |
+
|
595 |
+
# Example questions
|
596 |
+
gr.Examples(
|
597 |
+
examples=[
|
598 |
+
["What is machine learning and how does it work?"],
|
599 |
+
["mes: How does the MES system handle production data?"],
|
600 |
+
["technical: Explain the database architecture"],
|
601 |
+
["What are the main components of the system?"],
|
602 |
+
["How do I configure the application settings?"]
|
603 |
+
],
|
604 |
+
inputs=[query_input],
|
605 |
+
label="Example Questions"
|
606 |
+
)
|
607 |
+
|
608 |
+
gr.Markdown("""
|
609 |
+
---
|
610 |
+
|
611 |
+
**Tips:**
|
612 |
+
- Use prefixes (mes:, technical:, general:) to search specific knowledge bases
|
613 |
+
- Be specific with your questions for better results
|
614 |
+
- Sources are provided with each answer for verification
|
615 |
+
|
616 |
+
**Technical Info:**
|
617 |
+
- Powered by FastAPI backend
|
618 |
+
- Vector search with MMR retrieval
|
619 |
+
- Gemini 2.0 Flash for answer generation
|
620 |
+
""")
|
621 |
+
|
622 |
+
return interface
|
623 |
+
|
624 |
+
# API Endpoints
|
625 |
+
|
626 |
+
|
627 |
+
@app.get("/")
|
628 |
+
def root():
|
629 |
+
return {
|
630 |
+
"status": "running",
|
631 |
+
"model": "gemini-2.0-flash",
|
632 |
+
"using_direct_api": True,
|
633 |
+
"client_ready": True,
|
634 |
+
"gradio_interface": "/gradio"
|
635 |
+
}
|
636 |
+
|
637 |
+
|
638 |
+
@app.post("/")
|
639 |
+
async def ask_question(request: Request):
|
640 |
+
try:
|
641 |
+
# Print raw incoming request body
|
642 |
+
raw_body = await request.body()
|
643 |
+
print("Incoming POST request body:")
|
644 |
+
print(raw_body.decode("utf-8"))
|
645 |
+
|
646 |
+
# Parse into your Pydantic model
|
647 |
+
parsed_request = QueryRequest.model_validate_json(raw_body)
|
648 |
+
print("Parsed request object:", parsed_request)
|
649 |
+
|
650 |
+
vector_store, cleaned_query, store_key = load_vector_store_by_prefix(
|
651 |
+
parsed_request.query
|
652 |
+
)
|
653 |
+
|
654 |
+
if not vector_store:
|
655 |
+
raise HTTPException(
|
656 |
+
status_code=500, detail="Vector store not ready"
|
657 |
+
)
|
658 |
+
|
659 |
+
retriever = vector_store.as_retriever(
|
660 |
+
search_type="mmr",
|
661 |
+
search_kwargs={
|
662 |
+
"k": 6,
|
663 |
+
"fetch_k": 20,
|
664 |
+
"lambda_mult": 0.5
|
665 |
+
}
|
666 |
+
)
|
667 |
+
|
668 |
+
docs = retriever.get_relevant_documents(cleaned_query)
|
669 |
+
|
670 |
+
# Deduplicate
|
671 |
+
seen = set()
|
672 |
+
unique_docs = []
|
673 |
+
for doc in docs:
|
674 |
+
snippet = doc.page_content.strip()
|
675 |
+
if snippet not in seen:
|
676 |
+
seen.add(snippet)
|
677 |
+
unique_docs.append(doc)
|
678 |
+
docs = unique_docs[:5]
|
679 |
+
|
680 |
+
if not docs:
|
681 |
+
return {
|
682 |
+
"answer": "I couldn't find any relevant information in the knowledge base to answer your question.",
|
683 |
+
"model_used": "gemini-2.0-flash",
|
684 |
+
"vector_store_used": VECTOR_STORE_PATHS[store_key],
|
685 |
+
"sources": []
|
686 |
+
}
|
687 |
+
|
688 |
+
answer = generate_answer_with_gemini(cleaned_query, docs)
|
689 |
+
|
690 |
+
answer_obj = {
|
691 |
+
"answer": answer,
|
692 |
+
"model_used": "gemini-2.0-flash",
|
693 |
+
"vector_store_used": VECTOR_STORE_PATHS[store_key],
|
694 |
+
"sources": [
|
695 |
+
{
|
696 |
+
"content": doc.page_content[:500] + "...\n",
|
697 |
+
"metadata": doc.metadata
|
698 |
+
}
|
699 |
+
for doc in docs
|
700 |
+
]
|
701 |
+
}
|
702 |
+
|
703 |
+
return map_answer_to_chat_response(answer_obj)
|
704 |
|
|
|
|
|
|
|
705 |
except Exception as e:
|
706 |
+
print(f"Error in ask_question: {e}")
|
707 |
+
raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
|
708 |
|
709 |
+
# Create the Gradio interface
|
710 |
+
demo = create_gradio_interface()
|
711 |
+
|
712 |
+
# Mount Gradio on FastAPI at /gradio
|
713 |
+
app = gr.mount_gradio_app(app, demo, path="/gradio")
|
714 |
+
|
715 |
+
# Add a redirect for convenience
|
716 |
|
|
|
|
|
|
|
|
|
717 |
|
718 |
+
@app.get("/ui")
|
719 |
+
async def redirect_to_gradio():
|
720 |
+
"""Redirect /ui to /gradio for easier access"""
|
721 |
+
from fastapi.responses import RedirectResponse
|
722 |
+
return RedirectResponse(url="/gradio")
|
723 |
|
724 |
+
# Health check endpoint
|
725 |
+
|
726 |
+
|
727 |
+
@app.get("/health")
|
728 |
+
def health_check():
|
729 |
+
return {"status": "healthy", "gradio_mounted": True}
|
730 |
+
|
731 |
+
|
732 |
+
if __name__ == "__main__":
|
733 |
+
import uvicorn
|
734 |
+
port = int(os.environ.get("PORT", 8000))
|
735 |
+
uvicorn.run(app, host="0.0.0.0", port=port)
|
requirements.txt
CHANGED
@@ -46,6 +46,13 @@ pathlib2
|
|
46 |
gunicorn
|
47 |
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
# fastapi==0.104.1
|
50 |
# uvicorn[standard]==0.24.0
|
51 |
# requests==2.31.0
|
|
|
46 |
gunicorn
|
47 |
|
48 |
|
49 |
+
# NEW: Add Gradio for the web interface
|
50 |
+
gradio>=4.0.0
|
51 |
+
|
52 |
+
# Optional: For better Gradio themes
|
53 |
+
gradio-client
|
54 |
+
|
55 |
+
|
56 |
# fastapi==0.104.1
|
57 |
# uvicorn[standard]==0.24.0
|
58 |
# requests==2.31.0
|