Spaces:

ChrisSaws
/

mes-chatbot-rag-backend

Running

christian

testing gradio for interface

62944f8 about 1 month ago

24.5 kB

	# #!/usr/bin/env python3
	# """
	# Render.com deployment launcher for RAG Chatbot
	# Repository structure: rag_app/ is the git root
	# """

	# import os
	# import sys
	# import time
	# from pathlib import Path

	# print("🚀 Starting Render deployment setup...")
	# print(f"📁 Current directory: {os.getcwd()}")
	# print(f"📂 Contents: {os.listdir('.')}")

	# # Since we're already in rag_app/, no need to change directories
	# # Just ensure current directory is in Python path
	# sys.path.insert(0, os.getcwd())


	# def setup_for_render():
	# """Setup vector stores and environment for Render deployment"""
	# print("🔧 Setting up vector stores for Render...")

	# # Ensure directories exist (relative to rag_app/)
	# required_dirs = [
	# "./vector_stores",
	# "./docs",
	# "./docs/mes",
	# "./docs/technical",
	# "./docs/general"
	# ]

	# for directory in required_dirs:
	# os.makedirs(directory, exist_ok=True)
	# exists = "✅" if os.path.exists(directory) else "❌"
	# print(f"{exists} Directory: {directory}")

	# # Check existing vector stores
	# store_configs = [
	# ("MES Manual", "docs/mes", "./vector_stores/mes_db"),
	# ("Technical Docs", "docs/technical", "./vector_stores/tech_db"),
	# ("General Docs", "docs/general", "./vector_stores/general_db")
	# ]

	# stores_to_build = []
	# for name, doc_path, persist_dir in store_configs:
	# if os.path.exists(persist_dir) and os.listdir(persist_dir):
	# print(f"✅ {name} vector store already exists")
	# else:
	# stores_to_build.append((name, doc_path, persist_dir))
	# print(f"🔧 {name} vector store needs building")

	# # Build missing vector stores
	# if stores_to_build:
	# print(f"🏗️ Building {len(stores_to_build)} vector store(s)...")

	# # Add timeout to prevent Render build timeout
	# start_time = time.time()
	# MAX_BUILD_TIME = 600 # 10 minutes max for free tier

	# try:
	# # Import your vector store utilities
	# from utils.vector_store import build_vector_store
	# print("✅ Vector store utilities imported successfully")

	# for name, doc_path, persist_dir in stores_to_build:
	# # Check build time limit
	# elapsed = time.time() - start_time
	# if elapsed > MAX_BUILD_TIME:
	# print(
	# f"⏰ Build time limit reached ({elapsed:.1f}s), creating empty stores")
	# os.makedirs(persist_dir, exist_ok=True)
	# continue

	# print(f"📚 Building {name} (elapsed: {elapsed:.1f}s)...")

	# # Check if documents exist
	# if os.path.exists(doc_path):
	# doc_files = list(Path(doc_path).rglob("*"))
	# doc_files = [f for f in doc_files if f.is_file(
	# ) and not f.name.startswith('.')]

	# if doc_files:
	# print(
	# f"📄 Found {len(doc_files)} document(s) for {name}")

	# try:
	# build_vector_store(
	# doc_path=doc_path,
	# persist_directory=persist_dir
	# )
	# print(f"✅ {name} built successfully")

	# except Exception as e:
	# print(f"❌ Error building {name}: {str(e)}")
	# os.makedirs(persist_dir, exist_ok=True)
	# else:
	# print(f"⚠️ No documents found in {doc_path}")
	# os.makedirs(persist_dir, exist_ok=True)
	# else:
	# print(f"⚠️ Document path not found: {doc_path}")
	# os.makedirs(persist_dir, exist_ok=True)

	# except ImportError as e:
	# print(f"❌ Could not import vector store utilities: {e}")
	# print("📁 Creating empty vector store directories as fallback...")
	# for name, doc_path, persist_dir in stores_to_build:
	# os.makedirs(persist_dir, exist_ok=True)

	# except Exception as e:
	# print(f"❌ Unexpected error during vector store setup: {e}")
	# print("📁 Creating empty directories as fallback...")
	# for name, doc_path, persist_dir in stores_to_build:
	# os.makedirs(persist_dir, exist_ok=True)
	# else:
	# print("✅ All vector stores already exist!")

	# print("🎉 Vector store setup completed!")


	# def start_server():
	# """Start the FastAPI server"""
	# print("🌐 Starting FastAPI server...")

	# try:
	# # Import your FastAPI app from api/main.py
	# from api.main import app
	# print("✅ Successfully imported FastAPI app from api.main")

	# import uvicorn

	# # Render uses PORT environment variable
	# port = int(os.environ.get("PORT", 7860))
	# host = "0.0.0.0"

	# print(f"🚀 Starting server on {host}:{port}")
	# print(f"🔗 Health check will be available at: /{''}")

	# # Start the server
	# uvicorn.run(
	# app,
	# host=host,
	# port=port,
	# log_level="info",
	# access_log=True
	# )

	# except ImportError as e:
	# print(f"❌ Could not import FastAPI app: {e}")
	# print("📂 Current directory contents:")
	# for item in os.listdir('.'):
	# print(f" - {item}")
	# print("🔍 Looking for api/main.py...")
	# if os.path.exists('api/main.py'):
	# print("✅ api/main.py exists")
	# else:
	# print("❌ api/main.py not found")
	# sys.exit(1)

	# except Exception as e:
	# print(f"❌ Error starting server: {e}")
	# sys.exit(1)


	# if __name__ == "__main__":
	# print("=" * 60)
	# print("🎯 RAG Chatbot - Render Deployment")
	# print("📁 Repository root: rag_app/")
	# print("=" * 60)

	# # Setup phase
	# setup_for_render()

	# # Server start phase
	# start_server()


	# #!/usr/bin/env python3
	# """
	# HF Spaces deployment launcher for RAG Chatbot
	# Repository structure: rag_app/ is the git root
	# """

	# import os
	# import sys
	# import time
	# from pathlib import Path

	# import os

	# # HF Spaces only allows writing to /tmp
	# os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
	# os.environ["HF_HOME"] = "/tmp/hf_cache"
	# os.makedirs("/tmp/hf_cache", exist_ok=True)


	# print("🚀 Starting HF Spaces deployment setup...")
	# print(f"📁 Current directory: {os.getcwd()}")
	# print(f"📂 Contents: {os.listdir('.')}")

	# # Ensure current directory is in Python path
	# sys.path.insert(0, os.getcwd())

	# # HF Spaces writable path for ephemeral storage
	# TMP_VECTOR_STORE_ROOT = "/tmp/vector_stores"


	# def setup_for_spaces():
	# """Setup vector stores and environment for HF Spaces"""
	# print("🔧 Setting up vector stores for HF Spaces...")

	# # Ensure docs folders exist in repo
	# required_dirs = [
	# "./docs",
	# "./docs/mes",
	# "./docs/technical",
	# "./docs/general"
	# ]
	# for directory in required_dirs:
	# os.makedirs(directory, exist_ok=True)
	# exists = "✅" if os.path.exists(directory) else "❌"
	# print(f"{exists} Directory: {directory}")

	# # Map of vector stores (persist dirs now point to /tmp)
	# store_configs = [
	# ("MES Manual", "docs/mes", os.path.join(TMP_VECTOR_STORE_ROOT, "mes_db")),
	# ("Technical Docs", "docs/technical",
	# os.path.join(TMP_VECTOR_STORE_ROOT, "tech_db")),
	# ("General Docs", "docs/general",
	# os.path.join(TMP_VECTOR_STORE_ROOT, "general_db")),
	# ]

	# stores_to_build = []
	# for name, doc_path, persist_dir in store_configs:
	# # Check if store already exists in repo or in /tmp
	# if os.path.exists(persist_dir) and os.listdir(persist_dir):
	# print(f"✅ {name} vector store already exists in {persist_dir}")
	# else:
	# stores_to_build.append((name, doc_path, persist_dir))
	# print(f"🔧 {name} vector store needs building in {persist_dir}")

	# # Build missing vector stores
	# if stores_to_build:
	# print(f"🏗️ Building {len(stores_to_build)} vector store(s)...")
	# start_time = time.time()
	# MAX_BUILD_TIME = 600 # seconds

	# try:
	# from utils.vector_store import build_vector_store
	# print("✅ Vector store utilities imported successfully")

	# for name, doc_path, persist_dir in stores_to_build:
	# elapsed = time.time() - start_time
	# if elapsed > MAX_BUILD_TIME:
	# print(
	# f"⏰ Build time limit reached ({elapsed:.1f}s), creating empty store at {persist_dir}")
	# os.makedirs(persist_dir, exist_ok=True)
	# continue

	# if os.path.exists(doc_path):
	# doc_files = [f for f in Path(doc_path).rglob(
	# "*") if f.is_file() and not f.name.startswith('.')]
	# if doc_files:
	# print(
	# f"📄 Found {len(doc_files)} document(s) for {name}")
	# try:
	# build_vector_store(
	# doc_path=doc_path, persist_directory=persist_dir)
	# print(f"✅ {name} built successfully")
	# except Exception as e:
	# print(f"❌ Error building {name}: {str(e)}")
	# os.makedirs(persist_dir, exist_ok=True)
	# else:
	# print(f"⚠️ No documents found in {doc_path}")
	# os.makedirs(persist_dir, exist_ok=True)
	# else:
	# print(f"⚠️ Document path not found: {doc_path}")
	# os.makedirs(persist_dir, exist_ok=True)

	# except ImportError as e:
	# print(f"❌ Could not import vector store utilities: {e}")
	# for _, _, persist_dir in stores_to_build:
	# os.makedirs(persist_dir, exist_ok=True)

	# else:
	# print("✅ All vector stores already exist!")

	# print("🎉 Vector store setup completed!")


	# def start_server():
	# """Start the FastAPI server"""
	# print("🌐 Starting FastAPI server...")

	# try:
	# from api.main import app
	# print("✅ Successfully imported FastAPI app from api.main")

	# import uvicorn

	# port = int(os.environ.get("PORT", 7860))
	# host = "0.0.0.0"

	# print(f"🚀 Starting server on {host}:{port}")
	# uvicorn.run(app, host=host, port=port,
	# log_level="info", access_log=True)

	# except ImportError as e:
	# print(f"❌ Could not import FastAPI app: {e}")
	# sys.exit(1)
	# except Exception as e:
	# print(f"❌ Error starting server: {e}")
	# sys.exit(1)


	# if __name__ == "__main__":
	# print("=" * 60)
	# print("🎯 RAG Chatbot - HF Spaces Deployment")
	# print("=" * 60)

	# # Setup phase
	# setup_for_spaces()

	# # Server start phase
	# start_server()


	# app.py - Pure Gradio approach (for Gradio template)
	from fastapi import Request
	import requests
	from dotenv import load_dotenv
	from utils.vector_store import get_vector_store
	from pydantic import BaseModel
	from fastapi import FastAPI, HTTPException, Request
	import os
	import sys
	import gradio as gr

	from utils.helpers.chat_mapper import map_answer_to_chat_response

	from fastapi.middleware.cors import CORSMiddleware

	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

	load_dotenv()

	app = FastAPI()

	# Simplified CORS for debugging
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Vector store mapping for different domains
	VECTOR_STORE_PATHS = {
	"mes": "./vector_stores/mes_db",
	"technical": "./vector_stores/tech_db",
	"general": "./vector_stores/general_db",
	"default": "./vector_stores/general_db",
	}


	class QueryRequest(BaseModel):
	query: str


	# Gemini API setup
	GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
	if not GEMINI_API_KEY:
	raise ValueError("GEMINI_API_KEY environment variable required")

	GEMINI_API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent"

	# Vector store loader


	def load_vector_store_by_prefix(query: str):
	lower_q = query.lower().strip()
	for prefix, path in VECTOR_STORE_PATHS.items():
	if prefix != "default" and lower_q.startswith(f"{prefix}:"):
	cleaned_query = lower_q[len(prefix) + 1:].strip()
	return get_vector_store(persist_directory=path), cleaned_query, prefix
	return get_vector_store(persist_directory=VECTOR_STORE_PATHS["default"]), query, "default"


	def generate_answer_with_gemini(query: str, context_docs: list):
	# Build context string
	knowledge_parts = []
	for i, doc in enumerate(context_docs, 1):
	knowledge_parts.append(f"Data Source {i}: {doc.page_content.strip()}")
	knowledge_base = "\n\n".join(knowledge_parts)

	# The updated prompt is more direct and forceful
	prompt = (
	"You are an expert AI assistant that uses a provided knowledge base to answer questions. "
	"Your responses must always be based on this knowledge base, which is the ultimate source of truth. "
	"You will only use your internal knowledge to supplement the answer, never to contradict it. "
	"If and only if the knowledge base contains absolutely nothing relevant to the user's question, "
	"you will respond with a polite and concise statement saying you cannot answer the question from the information you have. "
	"You must never answer 'I don't know' if there is any information in the knowledge base that is even tangentially related to the question. "
	"Always try your best to construct a useful answer by synthesizing the provided information. "
	"Do not refer to the 'knowledge base' or 'sources' directly; instead, use phrases like 'based on the information I have'.\n\n"

	f"My knowledge base:\n{knowledge_base}\n\n"
	f"User's Question: {query}\n\nAnswer:"
	)

	# print the prompt for debugging
	print("Prompt sent to Gemini API:", prompt)

	try:
	response = requests.post(
	f"{GEMINI_API_URL}?key={GEMINI_API_KEY}",
	json={
	"contents": [
	{
	"role": "user",
	"parts": [
	{"text": prompt}
	]
	}
	],
	"generationConfig": {
	"temperature": 0.7,
	"maxOutputTokens": 300
	}
	},
	timeout=300
	)

	if response.status_code != 200:
	return f"API Error: {response.status_code} - {response.text}"

	data = response.json()

	# Extract answer text
	return (
	data.get("candidates", [{}])[0]
	.get("content", {})
	.get("parts", [{}])[0]
	.get("text", "")
	.strip()
	or "I couldn't generate an answer."
	)

	except Exception as e:
	return f"Error: {str(e)}"

	# Middleware for logging requests


	@app.middleware("http")
	async def log_requests(request: Request, call_next):
	print(f"Request: {request.method} {request.url}")
	print(f"Headers: {dict(request.headers)}")
	print(f"Origin: {request.headers.get('origin', 'No Origin')}")
	print(f"User-Agent: {request.headers.get('user-agent', 'No User-Agent')}")

	response = await call_next(request)
	print(f"Response Status: {response.status_code}")
	return response

	# NEW: Gradio interface function


	def gradio_chat_interface(query: str) -> str:
	"""
	Gradio interface function that uses your existing FastAPI logic
	"""
	try:
	if not query.strip():
	return "Please enter a question."

	print(f"Gradio query: {query}")

	# Use your existing logic
	vector_store, cleaned_query, store_key = load_vector_store_by_prefix(
	query)

	if not vector_store:
	return "Vector store not ready. Please try again later."

	retriever = vector_store.as_retriever(
	search_type="mmr",
	search_kwargs={
	"k": 6,
	"fetch_k": 20,
	"lambda_mult": 0.5
	}
	)

	docs = retriever.get_relevant_documents(cleaned_query)

	# Deduplicate
	seen = set()
	unique_docs = []
	for doc in docs:
	snippet = doc.page_content.strip()
	if snippet not in seen:
	seen.add(snippet)
	unique_docs.append(doc)
	docs = unique_docs[:5]

	if not docs:
	return "I couldn't find any relevant information in the knowledge base to answer your question."

	answer = generate_answer_with_gemini(cleaned_query, docs)

	# Format response for Gradio with better markdown
	formatted_response = f"## Answer\n\n{answer}\n\n"

	if docs:
	formatted_response += "## Sources\n\n"
	for i, doc in enumerate(docs, 1):
	source_name = doc.metadata.get('source', 'Unknown Source')
	page = doc.metadata.get('page', '')
	page_info = f" (Page {page})" if page else ""

	preview = doc.page_content[:400] + "..." if len(
	doc.page_content) > 400 else doc.page_content
	formatted_response += f"### {i}. {source_name}{page_info}\n\n{preview}\n\n---\n\n"

	return formatted_response

	except Exception as e:
	error_msg = f"Error occurred:\n\n```\n{str(e)}\n```"
	print(f"Gradio error: {e}")
	return error_msg

	# Create Gradio interface


	def create_gradio_interface():
	"""Create and configure the Gradio interface"""

	with gr.Blocks(
	title="RAG Chatbot",
	description="Ask questions about your knowledge base and get detailed answers with sources.",
	theme='soft',

	) as interface:

	gr.Markdown("""
	# RAG Chatbot

	Ask questions about your knowledge base and get detailed answers with sources.

	Available Knowledge:
	- MES Manual documentation (prefix with "mes:")
	- Technical documentation (prefix with "technical:")
	- General documentation (prefix with "general:" or no prefix)
	""")

	with gr.Row():
	with gr.Column(scale=4):
	query_input = gr.Textbox(
	label="Your Question",
	placeholder="Enter your question here... (e.g., 'What is machine learning?' or 'mes: How does the system work?')",
	lines=3,
	max_lines=10
	)
	with gr.Column(scale=1):
	submit_btn = gr.Button(
	"Ask Question", variant="primary", size="lg")
	clear_btn = gr.Button("Clear", variant="secondary")

	answer_output = gr.Markdown(
	label="Answer & Sources",
	value="Welcome! Ask a question above to get started."
	)

	# Event handlers
	submit_btn.click(
	gradio_chat_interface,
	inputs=[query_input],
	outputs=[answer_output]
	)

	query_input.submit( # Allow Enter key to submit
	gradio_chat_interface,
	inputs=[query_input],
	outputs=[answer_output]
	)

	clear_btn.click(
	lambda: ("", "Welcome! Ask a question above to get started."),
	outputs=[query_input, answer_output]
	)

	# Example questions
	gr.Examples(
	examples=[
	["What is machine learning and how does it work?"],
	["mes: How does the MES system handle production data?"],
	["technical: Explain the database architecture"],
	["What are the main components of the system?"],
	["How do I configure the application settings?"]
	],
	inputs=[query_input],
	label="Example Questions"
	)

	gr.Markdown("""
	---

	Tips:
	- Use prefixes (mes:, technical:, general:) to search specific knowledge bases
	- Be specific with your questions for better results
	- Sources are provided with each answer for verification

	Technical Info:
	- Powered by FastAPI backend
	- Vector search with MMR retrieval
	- Gemini 2.0 Flash for answer generation
	""")

	return interface

	# API Endpoints


	@app.get("/")
	def root():
	return {
	"status": "running",
	"model": "gemini-2.0-flash",
	"using_direct_api": True,
	"client_ready": True,
	"gradio_interface": "/gradio"
	}


	@app.post("/")
	async def ask_question(request: Request):
	try:
	# Print raw incoming request body
	raw_body = await request.body()
	print("Incoming POST request body:")
	print(raw_body.decode("utf-8"))

	# Parse into your Pydantic model
	parsed_request = QueryRequest.model_validate_json(raw_body)
	print("Parsed request object:", parsed_request)

	vector_store, cleaned_query, store_key = load_vector_store_by_prefix(
	parsed_request.query
	)

	if not vector_store:
	raise HTTPException(
	status_code=500, detail="Vector store not ready"
	)

	retriever = vector_store.as_retriever(
	search_type="mmr",
	search_kwargs={
	"k": 6,
	"fetch_k": 20,
	"lambda_mult": 0.5
	}
	)

	docs = retriever.get_relevant_documents(cleaned_query)

	# Deduplicate
	seen = set()
	unique_docs = []
	for doc in docs:
	snippet = doc.page_content.strip()
	if snippet not in seen:
	seen.add(snippet)
	unique_docs.append(doc)
	docs = unique_docs[:5]

	if not docs:
	return {
	"answer": "I couldn't find any relevant information in the knowledge base to answer your question.",
	"model_used": "gemini-2.0-flash",
	"vector_store_used": VECTOR_STORE_PATHS[store_key],
	"sources": []
	}

	answer = generate_answer_with_gemini(cleaned_query, docs)

	answer_obj = {
	"answer": answer,
	"model_used": "gemini-2.0-flash",
	"vector_store_used": VECTOR_STORE_PATHS[store_key],
	"sources": [
	{
	"content": doc.page_content[:500] + "...\n",
	"metadata": doc.metadata
	}
	for doc in docs
	]
	}

	return map_answer_to_chat_response(answer_obj)

	except Exception as e:
	print(f"Error in ask_question: {e}")
	raise HTTPException(status_code=500, detail=f"Error: {str(e)}")

	# Create the Gradio interface
	demo = create_gradio_interface()

	# Mount Gradio on FastAPI at /gradio
	app = gr.mount_gradio_app(app, demo, path="/gradio")

	# Add a redirect for convenience


	@app.get("/ui")
	async def redirect_to_gradio():
	"""Redirect /ui to /gradio for easier access"""
	from fastapi.responses import RedirectResponse
	return RedirectResponse(url="/gradio")

	# Health check endpoint


	@app.get("/health")
	def health_check():
	return {"status": "healthy", "gradio_mounted": True}


	if __name__ == "__main__":
	import uvicorn
	port = int(os.environ.get("PORT", 8000))
	uvicorn.run(app, host="0.0.0.0", port=port)