christian
testing gradio for interface
62944f8
raw
history blame
24.5 kB
# #!/usr/bin/env python3
# """
# Render.com deployment launcher for RAG Chatbot
# Repository structure: rag_app/ is the git root
# """
# import os
# import sys
# import time
# from pathlib import Path
# print("🚀 Starting Render deployment setup...")
# print(f"📁 Current directory: {os.getcwd()}")
# print(f"📂 Contents: {os.listdir('.')}")
# # Since we're already in rag_app/, no need to change directories
# # Just ensure current directory is in Python path
# sys.path.insert(0, os.getcwd())
# def setup_for_render():
# """Setup vector stores and environment for Render deployment"""
# print("🔧 Setting up vector stores for Render...")
# # Ensure directories exist (relative to rag_app/)
# required_dirs = [
# "./vector_stores",
# "./docs",
# "./docs/mes",
# "./docs/technical",
# "./docs/general"
# ]
# for directory in required_dirs:
# os.makedirs(directory, exist_ok=True)
# exists = "✅" if os.path.exists(directory) else "❌"
# print(f"{exists} Directory: {directory}")
# # Check existing vector stores
# store_configs = [
# ("MES Manual", "docs/mes", "./vector_stores/mes_db"),
# ("Technical Docs", "docs/technical", "./vector_stores/tech_db"),
# ("General Docs", "docs/general", "./vector_stores/general_db")
# ]
# stores_to_build = []
# for name, doc_path, persist_dir in store_configs:
# if os.path.exists(persist_dir) and os.listdir(persist_dir):
# print(f"✅ {name} vector store already exists")
# else:
# stores_to_build.append((name, doc_path, persist_dir))
# print(f"🔧 {name} vector store needs building")
# # Build missing vector stores
# if stores_to_build:
# print(f"🏗️ Building {len(stores_to_build)} vector store(s)...")
# # Add timeout to prevent Render build timeout
# start_time = time.time()
# MAX_BUILD_TIME = 600 # 10 minutes max for free tier
# try:
# # Import your vector store utilities
# from utils.vector_store import build_vector_store
# print("✅ Vector store utilities imported successfully")
# for name, doc_path, persist_dir in stores_to_build:
# # Check build time limit
# elapsed = time.time() - start_time
# if elapsed > MAX_BUILD_TIME:
# print(
# f"⏰ Build time limit reached ({elapsed:.1f}s), creating empty stores")
# os.makedirs(persist_dir, exist_ok=True)
# continue
# print(f"📚 Building {name} (elapsed: {elapsed:.1f}s)...")
# # Check if documents exist
# if os.path.exists(doc_path):
# doc_files = list(Path(doc_path).rglob("*"))
# doc_files = [f for f in doc_files if f.is_file(
# ) and not f.name.startswith('.')]
# if doc_files:
# print(
# f"📄 Found {len(doc_files)} document(s) for {name}")
# try:
# build_vector_store(
# doc_path=doc_path,
# persist_directory=persist_dir
# )
# print(f"✅ {name} built successfully")
# except Exception as e:
# print(f"❌ Error building {name}: {str(e)}")
# os.makedirs(persist_dir, exist_ok=True)
# else:
# print(f"⚠️ No documents found in {doc_path}")
# os.makedirs(persist_dir, exist_ok=True)
# else:
# print(f"⚠️ Document path not found: {doc_path}")
# os.makedirs(persist_dir, exist_ok=True)
# except ImportError as e:
# print(f"❌ Could not import vector store utilities: {e}")
# print("📁 Creating empty vector store directories as fallback...")
# for name, doc_path, persist_dir in stores_to_build:
# os.makedirs(persist_dir, exist_ok=True)
# except Exception as e:
# print(f"❌ Unexpected error during vector store setup: {e}")
# print("📁 Creating empty directories as fallback...")
# for name, doc_path, persist_dir in stores_to_build:
# os.makedirs(persist_dir, exist_ok=True)
# else:
# print("✅ All vector stores already exist!")
# print("🎉 Vector store setup completed!")
# def start_server():
# """Start the FastAPI server"""
# print("🌐 Starting FastAPI server...")
# try:
# # Import your FastAPI app from api/main.py
# from api.main import app
# print("✅ Successfully imported FastAPI app from api.main")
# import uvicorn
# # Render uses PORT environment variable
# port = int(os.environ.get("PORT", 7860))
# host = "0.0.0.0"
# print(f"🚀 Starting server on {host}:{port}")
# print(f"🔗 Health check will be available at: /{''}")
# # Start the server
# uvicorn.run(
# app,
# host=host,
# port=port,
# log_level="info",
# access_log=True
# )
# except ImportError as e:
# print(f"❌ Could not import FastAPI app: {e}")
# print("📂 Current directory contents:")
# for item in os.listdir('.'):
# print(f" - {item}")
# print("🔍 Looking for api/main.py...")
# if os.path.exists('api/main.py'):
# print("✅ api/main.py exists")
# else:
# print("❌ api/main.py not found")
# sys.exit(1)
# except Exception as e:
# print(f"❌ Error starting server: {e}")
# sys.exit(1)
# if __name__ == "__main__":
# print("=" * 60)
# print("🎯 RAG Chatbot - Render Deployment")
# print("📁 Repository root: rag_app/")
# print("=" * 60)
# # Setup phase
# setup_for_render()
# # Server start phase
# start_server()
# #!/usr/bin/env python3
# """
# HF Spaces deployment launcher for RAG Chatbot
# Repository structure: rag_app/ is the git root
# """
# import os
# import sys
# import time
# from pathlib import Path
# import os
# # HF Spaces only allows writing to /tmp
# os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
# os.environ["HF_HOME"] = "/tmp/hf_cache"
# os.makedirs("/tmp/hf_cache", exist_ok=True)
# print("🚀 Starting HF Spaces deployment setup...")
# print(f"📁 Current directory: {os.getcwd()}")
# print(f"📂 Contents: {os.listdir('.')}")
# # Ensure current directory is in Python path
# sys.path.insert(0, os.getcwd())
# # HF Spaces writable path for ephemeral storage
# TMP_VECTOR_STORE_ROOT = "/tmp/vector_stores"
# def setup_for_spaces():
# """Setup vector stores and environment for HF Spaces"""
# print("🔧 Setting up vector stores for HF Spaces...")
# # Ensure docs folders exist in repo
# required_dirs = [
# "./docs",
# "./docs/mes",
# "./docs/technical",
# "./docs/general"
# ]
# for directory in required_dirs:
# os.makedirs(directory, exist_ok=True)
# exists = "✅" if os.path.exists(directory) else "❌"
# print(f"{exists} Directory: {directory}")
# # Map of vector stores (persist dirs now point to /tmp)
# store_configs = [
# ("MES Manual", "docs/mes", os.path.join(TMP_VECTOR_STORE_ROOT, "mes_db")),
# ("Technical Docs", "docs/technical",
# os.path.join(TMP_VECTOR_STORE_ROOT, "tech_db")),
# ("General Docs", "docs/general",
# os.path.join(TMP_VECTOR_STORE_ROOT, "general_db")),
# ]
# stores_to_build = []
# for name, doc_path, persist_dir in store_configs:
# # Check if store already exists in repo or in /tmp
# if os.path.exists(persist_dir) and os.listdir(persist_dir):
# print(f"✅ {name} vector store already exists in {persist_dir}")
# else:
# stores_to_build.append((name, doc_path, persist_dir))
# print(f"🔧 {name} vector store needs building in {persist_dir}")
# # Build missing vector stores
# if stores_to_build:
# print(f"🏗️ Building {len(stores_to_build)} vector store(s)...")
# start_time = time.time()
# MAX_BUILD_TIME = 600 # seconds
# try:
# from utils.vector_store import build_vector_store
# print("✅ Vector store utilities imported successfully")
# for name, doc_path, persist_dir in stores_to_build:
# elapsed = time.time() - start_time
# if elapsed > MAX_BUILD_TIME:
# print(
# f"⏰ Build time limit reached ({elapsed:.1f}s), creating empty store at {persist_dir}")
# os.makedirs(persist_dir, exist_ok=True)
# continue
# if os.path.exists(doc_path):
# doc_files = [f for f in Path(doc_path).rglob(
# "*") if f.is_file() and not f.name.startswith('.')]
# if doc_files:
# print(
# f"📄 Found {len(doc_files)} document(s) for {name}")
# try:
# build_vector_store(
# doc_path=doc_path, persist_directory=persist_dir)
# print(f"✅ {name} built successfully")
# except Exception as e:
# print(f"❌ Error building {name}: {str(e)}")
# os.makedirs(persist_dir, exist_ok=True)
# else:
# print(f"⚠️ No documents found in {doc_path}")
# os.makedirs(persist_dir, exist_ok=True)
# else:
# print(f"⚠️ Document path not found: {doc_path}")
# os.makedirs(persist_dir, exist_ok=True)
# except ImportError as e:
# print(f"❌ Could not import vector store utilities: {e}")
# for _, _, persist_dir in stores_to_build:
# os.makedirs(persist_dir, exist_ok=True)
# else:
# print("✅ All vector stores already exist!")
# print("🎉 Vector store setup completed!")
# def start_server():
# """Start the FastAPI server"""
# print("🌐 Starting FastAPI server...")
# try:
# from api.main import app
# print("✅ Successfully imported FastAPI app from api.main")
# import uvicorn
# port = int(os.environ.get("PORT", 7860))
# host = "0.0.0.0"
# print(f"🚀 Starting server on {host}:{port}")
# uvicorn.run(app, host=host, port=port,
# log_level="info", access_log=True)
# except ImportError as e:
# print(f"❌ Could not import FastAPI app: {e}")
# sys.exit(1)
# except Exception as e:
# print(f"❌ Error starting server: {e}")
# sys.exit(1)
# if __name__ == "__main__":
# print("=" * 60)
# print("🎯 RAG Chatbot - HF Spaces Deployment")
# print("=" * 60)
# # Setup phase
# setup_for_spaces()
# # Server start phase
# start_server()
# app.py - Pure Gradio approach (for Gradio template)
from fastapi import Request
import requests
from dotenv import load_dotenv
from utils.vector_store import get_vector_store
from pydantic import BaseModel
from fastapi import FastAPI, HTTPException, Request
import os
import sys
import gradio as gr
from utils.helpers.chat_mapper import map_answer_to_chat_response
from fastapi.middleware.cors import CORSMiddleware
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
load_dotenv()
app = FastAPI()
# Simplified CORS for debugging
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Vector store mapping for different domains
VECTOR_STORE_PATHS = {
"mes": "./vector_stores/mes_db",
"technical": "./vector_stores/tech_db",
"general": "./vector_stores/general_db",
"default": "./vector_stores/general_db",
}
class QueryRequest(BaseModel):
query: str
# Gemini API setup
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
if not GEMINI_API_KEY:
raise ValueError("GEMINI_API_KEY environment variable required")
GEMINI_API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent"
# Vector store loader
def load_vector_store_by_prefix(query: str):
lower_q = query.lower().strip()
for prefix, path in VECTOR_STORE_PATHS.items():
if prefix != "default" and lower_q.startswith(f"{prefix}:"):
cleaned_query = lower_q[len(prefix) + 1:].strip()
return get_vector_store(persist_directory=path), cleaned_query, prefix
return get_vector_store(persist_directory=VECTOR_STORE_PATHS["default"]), query, "default"
def generate_answer_with_gemini(query: str, context_docs: list):
# Build context string
knowledge_parts = []
for i, doc in enumerate(context_docs, 1):
knowledge_parts.append(f"Data Source {i}: {doc.page_content.strip()}")
knowledge_base = "\n\n".join(knowledge_parts)
# The updated prompt is more direct and forceful
prompt = (
"You are an expert AI assistant that uses a provided knowledge base to answer questions. "
"Your responses must always be based on this knowledge base, which is the ultimate source of truth. "
"You will only use your internal knowledge to supplement the answer, never to contradict it. "
"If and only if the knowledge base contains absolutely nothing relevant to the user's question, "
"you will respond with a polite and concise statement saying you cannot answer the question from the information you have. "
"You must never answer 'I don't know' if there is any information in the knowledge base that is even tangentially related to the question. "
"Always try your best to construct a useful answer by synthesizing the provided information. "
"Do not refer to the 'knowledge base' or 'sources' directly; instead, use phrases like 'based on the information I have'.\n\n"
f"My knowledge base:\n{knowledge_base}\n\n"
f"User's Question: {query}\n\nAnswer:"
)
# print the prompt for debugging
print("Prompt sent to Gemini API:", prompt)
try:
response = requests.post(
f"{GEMINI_API_URL}?key={GEMINI_API_KEY}",
json={
"contents": [
{
"role": "user",
"parts": [
{"text": prompt}
]
}
],
"generationConfig": {
"temperature": 0.7,
"maxOutputTokens": 300
}
},
timeout=300
)
if response.status_code != 200:
return f"API Error: {response.status_code} - {response.text}"
data = response.json()
# Extract answer text
return (
data.get("candidates", [{}])[0]
.get("content", {})
.get("parts", [{}])[0]
.get("text", "")
.strip()
or "I couldn't generate an answer."
)
except Exception as e:
return f"Error: {str(e)}"
# Middleware for logging requests
@app.middleware("http")
async def log_requests(request: Request, call_next):
print(f"Request: {request.method} {request.url}")
print(f"Headers: {dict(request.headers)}")
print(f"Origin: {request.headers.get('origin', 'No Origin')}")
print(f"User-Agent: {request.headers.get('user-agent', 'No User-Agent')}")
response = await call_next(request)
print(f"Response Status: {response.status_code}")
return response
# NEW: Gradio interface function
def gradio_chat_interface(query: str) -> str:
"""
Gradio interface function that uses your existing FastAPI logic
"""
try:
if not query.strip():
return "Please enter a question."
print(f"Gradio query: {query}")
# Use your existing logic
vector_store, cleaned_query, store_key = load_vector_store_by_prefix(
query)
if not vector_store:
return "Vector store not ready. Please try again later."
retriever = vector_store.as_retriever(
search_type="mmr",
search_kwargs={
"k": 6,
"fetch_k": 20,
"lambda_mult": 0.5
}
)
docs = retriever.get_relevant_documents(cleaned_query)
# Deduplicate
seen = set()
unique_docs = []
for doc in docs:
snippet = doc.page_content.strip()
if snippet not in seen:
seen.add(snippet)
unique_docs.append(doc)
docs = unique_docs[:5]
if not docs:
return "I couldn't find any relevant information in the knowledge base to answer your question."
answer = generate_answer_with_gemini(cleaned_query, docs)
# Format response for Gradio with better markdown
formatted_response = f"## Answer\n\n{answer}\n\n"
if docs:
formatted_response += "## Sources\n\n"
for i, doc in enumerate(docs, 1):
source_name = doc.metadata.get('source', 'Unknown Source')
page = doc.metadata.get('page', '')
page_info = f" (Page {page})" if page else ""
preview = doc.page_content[:400] + "..." if len(
doc.page_content) > 400 else doc.page_content
formatted_response += f"### {i}. {source_name}{page_info}\n\n{preview}\n\n---\n\n"
return formatted_response
except Exception as e:
error_msg = f"**Error occurred:**\n\n```\n{str(e)}\n```"
print(f"Gradio error: {e}")
return error_msg
# Create Gradio interface
def create_gradio_interface():
"""Create and configure the Gradio interface"""
with gr.Blocks(
title="RAG Chatbot",
description="Ask questions about your knowledge base and get detailed answers with sources.",
theme='soft',
) as interface:
gr.Markdown("""
# RAG Chatbot
Ask questions about your knowledge base and get detailed answers with sources.
**Available Knowledge:**
- MES Manual documentation (prefix with "mes:")
- Technical documentation (prefix with "technical:")
- General documentation (prefix with "general:" or no prefix)
""")
with gr.Row():
with gr.Column(scale=4):
query_input = gr.Textbox(
label="Your Question",
placeholder="Enter your question here... (e.g., 'What is machine learning?' or 'mes: How does the system work?')",
lines=3,
max_lines=10
)
with gr.Column(scale=1):
submit_btn = gr.Button(
"Ask Question", variant="primary", size="lg")
clear_btn = gr.Button("Clear", variant="secondary")
answer_output = gr.Markdown(
label="Answer & Sources",
value="Welcome! Ask a question above to get started."
)
# Event handlers
submit_btn.click(
gradio_chat_interface,
inputs=[query_input],
outputs=[answer_output]
)
query_input.submit( # Allow Enter key to submit
gradio_chat_interface,
inputs=[query_input],
outputs=[answer_output]
)
clear_btn.click(
lambda: ("", "Welcome! Ask a question above to get started."),
outputs=[query_input, answer_output]
)
# Example questions
gr.Examples(
examples=[
["What is machine learning and how does it work?"],
["mes: How does the MES system handle production data?"],
["technical: Explain the database architecture"],
["What are the main components of the system?"],
["How do I configure the application settings?"]
],
inputs=[query_input],
label="Example Questions"
)
gr.Markdown("""
---
**Tips:**
- Use prefixes (mes:, technical:, general:) to search specific knowledge bases
- Be specific with your questions for better results
- Sources are provided with each answer for verification
**Technical Info:**
- Powered by FastAPI backend
- Vector search with MMR retrieval
- Gemini 2.0 Flash for answer generation
""")
return interface
# API Endpoints
@app.get("/")
def root():
return {
"status": "running",
"model": "gemini-2.0-flash",
"using_direct_api": True,
"client_ready": True,
"gradio_interface": "/gradio"
}
@app.post("/")
async def ask_question(request: Request):
try:
# Print raw incoming request body
raw_body = await request.body()
print("Incoming POST request body:")
print(raw_body.decode("utf-8"))
# Parse into your Pydantic model
parsed_request = QueryRequest.model_validate_json(raw_body)
print("Parsed request object:", parsed_request)
vector_store, cleaned_query, store_key = load_vector_store_by_prefix(
parsed_request.query
)
if not vector_store:
raise HTTPException(
status_code=500, detail="Vector store not ready"
)
retriever = vector_store.as_retriever(
search_type="mmr",
search_kwargs={
"k": 6,
"fetch_k": 20,
"lambda_mult": 0.5
}
)
docs = retriever.get_relevant_documents(cleaned_query)
# Deduplicate
seen = set()
unique_docs = []
for doc in docs:
snippet = doc.page_content.strip()
if snippet not in seen:
seen.add(snippet)
unique_docs.append(doc)
docs = unique_docs[:5]
if not docs:
return {
"answer": "I couldn't find any relevant information in the knowledge base to answer your question.",
"model_used": "gemini-2.0-flash",
"vector_store_used": VECTOR_STORE_PATHS[store_key],
"sources": []
}
answer = generate_answer_with_gemini(cleaned_query, docs)
answer_obj = {
"answer": answer,
"model_used": "gemini-2.0-flash",
"vector_store_used": VECTOR_STORE_PATHS[store_key],
"sources": [
{
"content": doc.page_content[:500] + "...\n",
"metadata": doc.metadata
}
for doc in docs
]
}
return map_answer_to_chat_response(answer_obj)
except Exception as e:
print(f"Error in ask_question: {e}")
raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
# Create the Gradio interface
demo = create_gradio_interface()
# Mount Gradio on FastAPI at /gradio
app = gr.mount_gradio_app(app, demo, path="/gradio")
# Add a redirect for convenience
@app.get("/ui")
async def redirect_to_gradio():
"""Redirect /ui to /gradio for easier access"""
from fastapi.responses import RedirectResponse
return RedirectResponse(url="/gradio")
# Health check endpoint
@app.get("/health")
def health_check():
return {"status": "healthy", "gradio_mounted": True}
if __name__ == "__main__":
import uvicorn
port = int(os.environ.get("PORT", 8000))
uvicorn.run(app, host="0.0.0.0", port=port)