# #!/usr/bin/env python3 | |
# """ | |
# Render.com deployment launcher for RAG Chatbot | |
# Repository structure: rag_app/ is the git root | |
# """ | |
# import os | |
# import sys | |
# import time | |
# from pathlib import Path | |
# print("🚀 Starting Render deployment setup...") | |
# print(f"📁 Current directory: {os.getcwd()}") | |
# print(f"📂 Contents: {os.listdir('.')}") | |
# # Since we're already in rag_app/, no need to change directories | |
# # Just ensure current directory is in Python path | |
# sys.path.insert(0, os.getcwd()) | |
# def setup_for_render(): | |
# """Setup vector stores and environment for Render deployment""" | |
# print("🔧 Setting up vector stores for Render...") | |
# # Ensure directories exist (relative to rag_app/) | |
# required_dirs = [ | |
# "./vector_stores", | |
# "./docs", | |
# "./docs/mes", | |
# "./docs/technical", | |
# "./docs/general" | |
# ] | |
# for directory in required_dirs: | |
# os.makedirs(directory, exist_ok=True) | |
# exists = "✅" if os.path.exists(directory) else "❌" | |
# print(f"{exists} Directory: {directory}") | |
# # Check existing vector stores | |
# store_configs = [ | |
# ("MES Manual", "docs/mes", "./vector_stores/mes_db"), | |
# ("Technical Docs", "docs/technical", "./vector_stores/tech_db"), | |
# ("General Docs", "docs/general", "./vector_stores/general_db") | |
# ] | |
# stores_to_build = [] | |
# for name, doc_path, persist_dir in store_configs: | |
# if os.path.exists(persist_dir) and os.listdir(persist_dir): | |
# print(f"✅ {name} vector store already exists") | |
# else: | |
# stores_to_build.append((name, doc_path, persist_dir)) | |
# print(f"🔧 {name} vector store needs building") | |
# # Build missing vector stores | |
# if stores_to_build: | |
# print(f"🏗️ Building {len(stores_to_build)} vector store(s)...") | |
# # Add timeout to prevent Render build timeout | |
# start_time = time.time() | |
# MAX_BUILD_TIME = 600 # 10 minutes max for free tier | |
# try: | |
# # Import your vector store utilities | |
# from utils.vector_store import build_vector_store | |
# print("✅ Vector store utilities imported successfully") | |
# for name, doc_path, persist_dir in stores_to_build: | |
# # Check build time limit | |
# elapsed = time.time() - start_time | |
# if elapsed > MAX_BUILD_TIME: | |
# print( | |
# f"⏰ Build time limit reached ({elapsed:.1f}s), creating empty stores") | |
# os.makedirs(persist_dir, exist_ok=True) | |
# continue | |
# print(f"📚 Building {name} (elapsed: {elapsed:.1f}s)...") | |
# # Check if documents exist | |
# if os.path.exists(doc_path): | |
# doc_files = list(Path(doc_path).rglob("*")) | |
# doc_files = [f for f in doc_files if f.is_file( | |
# ) and not f.name.startswith('.')] | |
# if doc_files: | |
# print( | |
# f"📄 Found {len(doc_files)} document(s) for {name}") | |
# try: | |
# build_vector_store( | |
# doc_path=doc_path, | |
# persist_directory=persist_dir | |
# ) | |
# print(f"✅ {name} built successfully") | |
# except Exception as e: | |
# print(f"❌ Error building {name}: {str(e)}") | |
# os.makedirs(persist_dir, exist_ok=True) | |
# else: | |
# print(f"⚠️ No documents found in {doc_path}") | |
# os.makedirs(persist_dir, exist_ok=True) | |
# else: | |
# print(f"⚠️ Document path not found: {doc_path}") | |
# os.makedirs(persist_dir, exist_ok=True) | |
# except ImportError as e: | |
# print(f"❌ Could not import vector store utilities: {e}") | |
# print("📁 Creating empty vector store directories as fallback...") | |
# for name, doc_path, persist_dir in stores_to_build: | |
# os.makedirs(persist_dir, exist_ok=True) | |
# except Exception as e: | |
# print(f"❌ Unexpected error during vector store setup: {e}") | |
# print("📁 Creating empty directories as fallback...") | |
# for name, doc_path, persist_dir in stores_to_build: | |
# os.makedirs(persist_dir, exist_ok=True) | |
# else: | |
# print("✅ All vector stores already exist!") | |
# print("🎉 Vector store setup completed!") | |
# def start_server(): | |
# """Start the FastAPI server""" | |
# print("🌐 Starting FastAPI server...") | |
# try: | |
# # Import your FastAPI app from api/main.py | |
# from api.main import app | |
# print("✅ Successfully imported FastAPI app from api.main") | |
# import uvicorn | |
# # Render uses PORT environment variable | |
# port = int(os.environ.get("PORT", 7860)) | |
# host = "0.0.0.0" | |
# print(f"🚀 Starting server on {host}:{port}") | |
# print(f"🔗 Health check will be available at: /{''}") | |
# # Start the server | |
# uvicorn.run( | |
# app, | |
# host=host, | |
# port=port, | |
# log_level="info", | |
# access_log=True | |
# ) | |
# except ImportError as e: | |
# print(f"❌ Could not import FastAPI app: {e}") | |
# print("📂 Current directory contents:") | |
# for item in os.listdir('.'): | |
# print(f" - {item}") | |
# print("🔍 Looking for api/main.py...") | |
# if os.path.exists('api/main.py'): | |
# print("✅ api/main.py exists") | |
# else: | |
# print("❌ api/main.py not found") | |
# sys.exit(1) | |
# except Exception as e: | |
# print(f"❌ Error starting server: {e}") | |
# sys.exit(1) | |
# if __name__ == "__main__": | |
# print("=" * 60) | |
# print("🎯 RAG Chatbot - Render Deployment") | |
# print("📁 Repository root: rag_app/") | |
# print("=" * 60) | |
# # Setup phase | |
# setup_for_render() | |
# # Server start phase | |
# start_server() | |
# #!/usr/bin/env python3 | |
# """ | |
# HF Spaces deployment launcher for RAG Chatbot | |
# Repository structure: rag_app/ is the git root | |
# """ | |
# import os | |
# import sys | |
# import time | |
# from pathlib import Path | |
# import os | |
# # HF Spaces only allows writing to /tmp | |
# os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache" | |
# os.environ["HF_HOME"] = "/tmp/hf_cache" | |
# os.makedirs("/tmp/hf_cache", exist_ok=True) | |
# print("🚀 Starting HF Spaces deployment setup...") | |
# print(f"📁 Current directory: {os.getcwd()}") | |
# print(f"📂 Contents: {os.listdir('.')}") | |
# # Ensure current directory is in Python path | |
# sys.path.insert(0, os.getcwd()) | |
# # HF Spaces writable path for ephemeral storage | |
# TMP_VECTOR_STORE_ROOT = "/tmp/vector_stores" | |
# def setup_for_spaces(): | |
# """Setup vector stores and environment for HF Spaces""" | |
# print("🔧 Setting up vector stores for HF Spaces...") | |
# # Ensure docs folders exist in repo | |
# required_dirs = [ | |
# "./docs", | |
# "./docs/mes", | |
# "./docs/technical", | |
# "./docs/general" | |
# ] | |
# for directory in required_dirs: | |
# os.makedirs(directory, exist_ok=True) | |
# exists = "✅" if os.path.exists(directory) else "❌" | |
# print(f"{exists} Directory: {directory}") | |
# # Map of vector stores (persist dirs now point to /tmp) | |
# store_configs = [ | |
# ("MES Manual", "docs/mes", os.path.join(TMP_VECTOR_STORE_ROOT, "mes_db")), | |
# ("Technical Docs", "docs/technical", | |
# os.path.join(TMP_VECTOR_STORE_ROOT, "tech_db")), | |
# ("General Docs", "docs/general", | |
# os.path.join(TMP_VECTOR_STORE_ROOT, "general_db")), | |
# ] | |
# stores_to_build = [] | |
# for name, doc_path, persist_dir in store_configs: | |
# # Check if store already exists in repo or in /tmp | |
# if os.path.exists(persist_dir) and os.listdir(persist_dir): | |
# print(f"✅ {name} vector store already exists in {persist_dir}") | |
# else: | |
# stores_to_build.append((name, doc_path, persist_dir)) | |
# print(f"🔧 {name} vector store needs building in {persist_dir}") | |
# # Build missing vector stores | |
# if stores_to_build: | |
# print(f"🏗️ Building {len(stores_to_build)} vector store(s)...") | |
# start_time = time.time() | |
# MAX_BUILD_TIME = 600 # seconds | |
# try: | |
# from utils.vector_store import build_vector_store | |
# print("✅ Vector store utilities imported successfully") | |
# for name, doc_path, persist_dir in stores_to_build: | |
# elapsed = time.time() - start_time | |
# if elapsed > MAX_BUILD_TIME: | |
# print( | |
# f"⏰ Build time limit reached ({elapsed:.1f}s), creating empty store at {persist_dir}") | |
# os.makedirs(persist_dir, exist_ok=True) | |
# continue | |
# if os.path.exists(doc_path): | |
# doc_files = [f for f in Path(doc_path).rglob( | |
# "*") if f.is_file() and not f.name.startswith('.')] | |
# if doc_files: | |
# print( | |
# f"📄 Found {len(doc_files)} document(s) for {name}") | |
# try: | |
# build_vector_store( | |
# doc_path=doc_path, persist_directory=persist_dir) | |
# print(f"✅ {name} built successfully") | |
# except Exception as e: | |
# print(f"❌ Error building {name}: {str(e)}") | |
# os.makedirs(persist_dir, exist_ok=True) | |
# else: | |
# print(f"⚠️ No documents found in {doc_path}") | |
# os.makedirs(persist_dir, exist_ok=True) | |
# else: | |
# print(f"⚠️ Document path not found: {doc_path}") | |
# os.makedirs(persist_dir, exist_ok=True) | |
# except ImportError as e: | |
# print(f"❌ Could not import vector store utilities: {e}") | |
# for _, _, persist_dir in stores_to_build: | |
# os.makedirs(persist_dir, exist_ok=True) | |
# else: | |
# print("✅ All vector stores already exist!") | |
# print("🎉 Vector store setup completed!") | |
# def start_server(): | |
# """Start the FastAPI server""" | |
# print("🌐 Starting FastAPI server...") | |
# try: | |
# from api.main import app | |
# print("✅ Successfully imported FastAPI app from api.main") | |
# import uvicorn | |
# port = int(os.environ.get("PORT", 7860)) | |
# host = "0.0.0.0" | |
# print(f"🚀 Starting server on {host}:{port}") | |
# uvicorn.run(app, host=host, port=port, | |
# log_level="info", access_log=True) | |
# except ImportError as e: | |
# print(f"❌ Could not import FastAPI app: {e}") | |
# sys.exit(1) | |
# except Exception as e: | |
# print(f"❌ Error starting server: {e}") | |
# sys.exit(1) | |
# if __name__ == "__main__": | |
# print("=" * 60) | |
# print("🎯 RAG Chatbot - HF Spaces Deployment") | |
# print("=" * 60) | |
# # Setup phase | |
# setup_for_spaces() | |
# # Server start phase | |
# start_server() | |
# app.py - Pure Gradio approach (for Gradio template) | |
from fastapi import Request | |
import requests | |
from dotenv import load_dotenv | |
from utils.vector_store import get_vector_store | |
from pydantic import BaseModel | |
from fastapi import FastAPI, HTTPException, Request | |
import os | |
import sys | |
import gradio as gr | |
from utils.helpers.chat_mapper import map_answer_to_chat_response | |
from fastapi.middleware.cors import CORSMiddleware | |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
load_dotenv() | |
app = FastAPI() | |
# Simplified CORS for debugging | |
app.add_middleware( | |
CORSMiddleware, | |
allow_origins=["*"], | |
allow_credentials=True, | |
allow_methods=["*"], | |
allow_headers=["*"], | |
) | |
# Vector store mapping for different domains | |
VECTOR_STORE_PATHS = { | |
"mes": "./vector_stores/mes_db", | |
"technical": "./vector_stores/tech_db", | |
"general": "./vector_stores/general_db", | |
"default": "./vector_stores/general_db", | |
} | |
class QueryRequest(BaseModel): | |
query: str | |
# Gemini API setup | |
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY") | |
if not GEMINI_API_KEY: | |
raise ValueError("GEMINI_API_KEY environment variable required") | |
GEMINI_API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent" | |
# Vector store loader | |
def load_vector_store_by_prefix(query: str): | |
lower_q = query.lower().strip() | |
for prefix, path in VECTOR_STORE_PATHS.items(): | |
if prefix != "default" and lower_q.startswith(f"{prefix}:"): | |
cleaned_query = lower_q[len(prefix) + 1:].strip() | |
return get_vector_store(persist_directory=path), cleaned_query, prefix | |
return get_vector_store(persist_directory=VECTOR_STORE_PATHS["default"]), query, "default" | |
def generate_answer_with_gemini(query: str, context_docs: list): | |
# Build context string | |
knowledge_parts = [] | |
for i, doc in enumerate(context_docs, 1): | |
knowledge_parts.append(f"Data Source {i}: {doc.page_content.strip()}") | |
knowledge_base = "\n\n".join(knowledge_parts) | |
# The updated prompt is more direct and forceful | |
prompt = ( | |
"You are an expert AI assistant that uses a provided knowledge base to answer questions. " | |
"Your responses must always be based on this knowledge base, which is the ultimate source of truth. " | |
"You will only use your internal knowledge to supplement the answer, never to contradict it. " | |
"If and only if the knowledge base contains absolutely nothing relevant to the user's question, " | |
"you will respond with a polite and concise statement saying you cannot answer the question from the information you have. " | |
"You must never answer 'I don't know' if there is any information in the knowledge base that is even tangentially related to the question. " | |
"Always try your best to construct a useful answer by synthesizing the provided information. " | |
"Do not refer to the 'knowledge base' or 'sources' directly; instead, use phrases like 'based on the information I have'.\n\n" | |
f"My knowledge base:\n{knowledge_base}\n\n" | |
f"User's Question: {query}\n\nAnswer:" | |
) | |
# print the prompt for debugging | |
print("Prompt sent to Gemini API:", prompt) | |
try: | |
response = requests.post( | |
f"{GEMINI_API_URL}?key={GEMINI_API_KEY}", | |
json={ | |
"contents": [ | |
{ | |
"role": "user", | |
"parts": [ | |
{"text": prompt} | |
] | |
} | |
], | |
"generationConfig": { | |
"temperature": 0.7, | |
"maxOutputTokens": 300 | |
} | |
}, | |
timeout=300 | |
) | |
if response.status_code != 200: | |
return f"API Error: {response.status_code} - {response.text}" | |
data = response.json() | |
# Extract answer text | |
return ( | |
data.get("candidates", [{}])[0] | |
.get("content", {}) | |
.get("parts", [{}])[0] | |
.get("text", "") | |
.strip() | |
or "I couldn't generate an answer." | |
) | |
except Exception as e: | |
return f"Error: {str(e)}" | |
# Middleware for logging requests | |
async def log_requests(request: Request, call_next): | |
print(f"Request: {request.method} {request.url}") | |
print(f"Headers: {dict(request.headers)}") | |
print(f"Origin: {request.headers.get('origin', 'No Origin')}") | |
print(f"User-Agent: {request.headers.get('user-agent', 'No User-Agent')}") | |
response = await call_next(request) | |
print(f"Response Status: {response.status_code}") | |
return response | |
# NEW: Gradio interface function | |
def gradio_chat_interface(query: str) -> str: | |
""" | |
Gradio interface function that uses your existing FastAPI logic | |
""" | |
try: | |
if not query.strip(): | |
return "Please enter a question." | |
print(f"Gradio query: {query}") | |
# Use your existing logic | |
vector_store, cleaned_query, store_key = load_vector_store_by_prefix( | |
query) | |
if not vector_store: | |
return "Vector store not ready. Please try again later." | |
retriever = vector_store.as_retriever( | |
search_type="mmr", | |
search_kwargs={ | |
"k": 6, | |
"fetch_k": 20, | |
"lambda_mult": 0.5 | |
} | |
) | |
docs = retriever.get_relevant_documents(cleaned_query) | |
# Deduplicate | |
seen = set() | |
unique_docs = [] | |
for doc in docs: | |
snippet = doc.page_content.strip() | |
if snippet not in seen: | |
seen.add(snippet) | |
unique_docs.append(doc) | |
docs = unique_docs[:5] | |
if not docs: | |
return "I couldn't find any relevant information in the knowledge base to answer your question." | |
answer = generate_answer_with_gemini(cleaned_query, docs) | |
# Format response for Gradio with better markdown | |
formatted_response = f"## Answer\n\n{answer}\n\n" | |
if docs: | |
formatted_response += "## Sources\n\n" | |
for i, doc in enumerate(docs, 1): | |
source_name = doc.metadata.get('source', 'Unknown Source') | |
page = doc.metadata.get('page', '') | |
page_info = f" (Page {page})" if page else "" | |
preview = doc.page_content[:400] + "..." if len( | |
doc.page_content) > 400 else doc.page_content | |
formatted_response += f"### {i}. {source_name}{page_info}\n\n{preview}\n\n---\n\n" | |
return formatted_response | |
except Exception as e: | |
error_msg = f"**Error occurred:**\n\n```\n{str(e)}\n```" | |
print(f"Gradio error: {e}") | |
return error_msg | |
# Create Gradio interface | |
def create_gradio_interface(): | |
"""Create and configure the Gradio interface""" | |
with gr.Blocks( | |
title="RAG Chatbot", | |
description="Ask questions about your knowledge base and get detailed answers with sources.", | |
theme='soft', | |
) as interface: | |
gr.Markdown(""" | |
# RAG Chatbot | |
Ask questions about your knowledge base and get detailed answers with sources. | |
**Available Knowledge:** | |
- MES Manual documentation (prefix with "mes:") | |
- Technical documentation (prefix with "technical:") | |
- General documentation (prefix with "general:" or no prefix) | |
""") | |
with gr.Row(): | |
with gr.Column(scale=4): | |
query_input = gr.Textbox( | |
label="Your Question", | |
placeholder="Enter your question here... (e.g., 'What is machine learning?' or 'mes: How does the system work?')", | |
lines=3, | |
max_lines=10 | |
) | |
with gr.Column(scale=1): | |
submit_btn = gr.Button( | |
"Ask Question", variant="primary", size="lg") | |
clear_btn = gr.Button("Clear", variant="secondary") | |
answer_output = gr.Markdown( | |
label="Answer & Sources", | |
value="Welcome! Ask a question above to get started." | |
) | |
# Event handlers | |
submit_btn.click( | |
gradio_chat_interface, | |
inputs=[query_input], | |
outputs=[answer_output] | |
) | |
query_input.submit( # Allow Enter key to submit | |
gradio_chat_interface, | |
inputs=[query_input], | |
outputs=[answer_output] | |
) | |
clear_btn.click( | |
lambda: ("", "Welcome! Ask a question above to get started."), | |
outputs=[query_input, answer_output] | |
) | |
# Example questions | |
gr.Examples( | |
examples=[ | |
["What is machine learning and how does it work?"], | |
["mes: How does the MES system handle production data?"], | |
["technical: Explain the database architecture"], | |
["What are the main components of the system?"], | |
["How do I configure the application settings?"] | |
], | |
inputs=[query_input], | |
label="Example Questions" | |
) | |
gr.Markdown(""" | |
--- | |
**Tips:** | |
- Use prefixes (mes:, technical:, general:) to search specific knowledge bases | |
- Be specific with your questions for better results | |
- Sources are provided with each answer for verification | |
**Technical Info:** | |
- Powered by FastAPI backend | |
- Vector search with MMR retrieval | |
- Gemini 2.0 Flash for answer generation | |
""") | |
return interface | |
# API Endpoints | |
def root(): | |
return { | |
"status": "running", | |
"model": "gemini-2.0-flash", | |
"using_direct_api": True, | |
"client_ready": True, | |
"gradio_interface": "/gradio" | |
} | |
async def ask_question(request: Request): | |
try: | |
# Print raw incoming request body | |
raw_body = await request.body() | |
print("Incoming POST request body:") | |
print(raw_body.decode("utf-8")) | |
# Parse into your Pydantic model | |
parsed_request = QueryRequest.model_validate_json(raw_body) | |
print("Parsed request object:", parsed_request) | |
vector_store, cleaned_query, store_key = load_vector_store_by_prefix( | |
parsed_request.query | |
) | |
if not vector_store: | |
raise HTTPException( | |
status_code=500, detail="Vector store not ready" | |
) | |
retriever = vector_store.as_retriever( | |
search_type="mmr", | |
search_kwargs={ | |
"k": 6, | |
"fetch_k": 20, | |
"lambda_mult": 0.5 | |
} | |
) | |
docs = retriever.get_relevant_documents(cleaned_query) | |
# Deduplicate | |
seen = set() | |
unique_docs = [] | |
for doc in docs: | |
snippet = doc.page_content.strip() | |
if snippet not in seen: | |
seen.add(snippet) | |
unique_docs.append(doc) | |
docs = unique_docs[:5] | |
if not docs: | |
return { | |
"answer": "I couldn't find any relevant information in the knowledge base to answer your question.", | |
"model_used": "gemini-2.0-flash", | |
"vector_store_used": VECTOR_STORE_PATHS[store_key], | |
"sources": [] | |
} | |
answer = generate_answer_with_gemini(cleaned_query, docs) | |
answer_obj = { | |
"answer": answer, | |
"model_used": "gemini-2.0-flash", | |
"vector_store_used": VECTOR_STORE_PATHS[store_key], | |
"sources": [ | |
{ | |
"content": doc.page_content[:500] + "...\n", | |
"metadata": doc.metadata | |
} | |
for doc in docs | |
] | |
} | |
return map_answer_to_chat_response(answer_obj) | |
except Exception as e: | |
print(f"Error in ask_question: {e}") | |
raise HTTPException(status_code=500, detail=f"Error: {str(e)}") | |
# Create the Gradio interface | |
demo = create_gradio_interface() | |
# Mount Gradio on FastAPI at /gradio | |
app = gr.mount_gradio_app(app, demo, path="/gradio") | |
# Add a redirect for convenience | |
async def redirect_to_gradio(): | |
"""Redirect /ui to /gradio for easier access""" | |
from fastapi.responses import RedirectResponse | |
return RedirectResponse(url="/gradio") | |
# Health check endpoint | |
def health_check(): | |
return {"status": "healthy", "gradio_mounted": True} | |
if __name__ == "__main__": | |
import uvicorn | |
port = int(os.environ.get("PORT", 8000)) | |
uvicorn.run(app, host="0.0.0.0", port=port) | |