# #!/usr/bin/env python3 # """ # Render.com deployment launcher for RAG Chatbot # Repository structure: rag_app/ is the git root # """ # import os # import sys # import time # from pathlib import Path # print("🚀 Starting Render deployment setup...") # print(f"📁 Current directory: {os.getcwd()}") # print(f"📂 Contents: {os.listdir('.')}") # # Since we're already in rag_app/, no need to change directories # # Just ensure current directory is in Python path # sys.path.insert(0, os.getcwd()) # def setup_for_render(): # """Setup vector stores and environment for Render deployment""" # print("🔧 Setting up vector stores for Render...") # # Ensure directories exist (relative to rag_app/) # required_dirs = [ # "./vector_stores", # "./docs", # "./docs/mes", # "./docs/technical", # "./docs/general" # ] # for directory in required_dirs: # os.makedirs(directory, exist_ok=True) # exists = "✅" if os.path.exists(directory) else "❌" # print(f"{exists} Directory: {directory}") # # Check existing vector stores # store_configs = [ # ("MES Manual", "docs/mes", "./vector_stores/mes_db"), # ("Technical Docs", "docs/technical", "./vector_stores/tech_db"), # ("General Docs", "docs/general", "./vector_stores/general_db") # ] # stores_to_build = [] # for name, doc_path, persist_dir in store_configs: # if os.path.exists(persist_dir) and os.listdir(persist_dir): # print(f"✅ {name} vector store already exists") # else: # stores_to_build.append((name, doc_path, persist_dir)) # print(f"🔧 {name} vector store needs building") # # Build missing vector stores # if stores_to_build: # print(f"🏗️ Building {len(stores_to_build)} vector store(s)...") # # Add timeout to prevent Render build timeout # start_time = time.time() # MAX_BUILD_TIME = 600 # 10 minutes max for free tier # try: # # Import your vector store utilities # from utils.vector_store import build_vector_store # print("✅ Vector store utilities imported successfully") # for name, doc_path, persist_dir in stores_to_build: # # Check build time limit # elapsed = time.time() - start_time # if elapsed > MAX_BUILD_TIME: # print( # f"⏰ Build time limit reached ({elapsed:.1f}s), creating empty stores") # os.makedirs(persist_dir, exist_ok=True) # continue # print(f"📚 Building {name} (elapsed: {elapsed:.1f}s)...") # # Check if documents exist # if os.path.exists(doc_path): # doc_files = list(Path(doc_path).rglob("*")) # doc_files = [f for f in doc_files if f.is_file( # ) and not f.name.startswith('.')] # if doc_files: # print( # f"📄 Found {len(doc_files)} document(s) for {name}") # try: # build_vector_store( # doc_path=doc_path, # persist_directory=persist_dir # ) # print(f"✅ {name} built successfully") # except Exception as e: # print(f"❌ Error building {name}: {str(e)}") # os.makedirs(persist_dir, exist_ok=True) # else: # print(f"⚠️ No documents found in {doc_path}") # os.makedirs(persist_dir, exist_ok=True) # else: # print(f"⚠️ Document path not found: {doc_path}") # os.makedirs(persist_dir, exist_ok=True) # except ImportError as e: # print(f"❌ Could not import vector store utilities: {e}") # print("📁 Creating empty vector store directories as fallback...") # for name, doc_path, persist_dir in stores_to_build: # os.makedirs(persist_dir, exist_ok=True) # except Exception as e: # print(f"❌ Unexpected error during vector store setup: {e}") # print("📁 Creating empty directories as fallback...") # for name, doc_path, persist_dir in stores_to_build: # os.makedirs(persist_dir, exist_ok=True) # else: # print("✅ All vector stores already exist!") # print("🎉 Vector store setup completed!") # def start_server(): # """Start the FastAPI server""" # print("🌐 Starting FastAPI server...") # try: # # Import your FastAPI app from api/main.py # from api.main import app # print("✅ Successfully imported FastAPI app from api.main") # import uvicorn # # Render uses PORT environment variable # port = int(os.environ.get("PORT", 7860)) # host = "0.0.0.0" # print(f"🚀 Starting server on {host}:{port}") # print(f"🔗 Health check will be available at: /{''}") # # Start the server # uvicorn.run( # app, # host=host, # port=port, # log_level="info", # access_log=True # ) # except ImportError as e: # print(f"❌ Could not import FastAPI app: {e}") # print("📂 Current directory contents:") # for item in os.listdir('.'): # print(f" - {item}") # print("🔍 Looking for api/main.py...") # if os.path.exists('api/main.py'): # print("✅ api/main.py exists") # else: # print("❌ api/main.py not found") # sys.exit(1) # except Exception as e: # print(f"❌ Error starting server: {e}") # sys.exit(1) # if __name__ == "__main__": # print("=" * 60) # print("🎯 RAG Chatbot - Render Deployment") # print("📁 Repository root: rag_app/") # print("=" * 60) # # Setup phase # setup_for_render() # # Server start phase # start_server() # #!/usr/bin/env python3 # """ # HF Spaces deployment launcher for RAG Chatbot # Repository structure: rag_app/ is the git root # """ # import os # import sys # import time # from pathlib import Path # import os # # HF Spaces only allows writing to /tmp # os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache" # os.environ["HF_HOME"] = "/tmp/hf_cache" # os.makedirs("/tmp/hf_cache", exist_ok=True) # print("🚀 Starting HF Spaces deployment setup...") # print(f"📁 Current directory: {os.getcwd()}") # print(f"📂 Contents: {os.listdir('.')}") # # Ensure current directory is in Python path # sys.path.insert(0, os.getcwd()) # # HF Spaces writable path for ephemeral storage # TMP_VECTOR_STORE_ROOT = "/tmp/vector_stores" # def setup_for_spaces(): # """Setup vector stores and environment for HF Spaces""" # print("🔧 Setting up vector stores for HF Spaces...") # # Ensure docs folders exist in repo # required_dirs = [ # "./docs", # "./docs/mes", # "./docs/technical", # "./docs/general" # ] # for directory in required_dirs: # os.makedirs(directory, exist_ok=True) # exists = "✅" if os.path.exists(directory) else "❌" # print(f"{exists} Directory: {directory}") # # Map of vector stores (persist dirs now point to /tmp) # store_configs = [ # ("MES Manual", "docs/mes", os.path.join(TMP_VECTOR_STORE_ROOT, "mes_db")), # ("Technical Docs", "docs/technical", # os.path.join(TMP_VECTOR_STORE_ROOT, "tech_db")), # ("General Docs", "docs/general", # os.path.join(TMP_VECTOR_STORE_ROOT, "general_db")), # ] # stores_to_build = [] # for name, doc_path, persist_dir in store_configs: # # Check if store already exists in repo or in /tmp # if os.path.exists(persist_dir) and os.listdir(persist_dir): # print(f"✅ {name} vector store already exists in {persist_dir}") # else: # stores_to_build.append((name, doc_path, persist_dir)) # print(f"🔧 {name} vector store needs building in {persist_dir}") # # Build missing vector stores # if stores_to_build: # print(f"🏗️ Building {len(stores_to_build)} vector store(s)...") # start_time = time.time() # MAX_BUILD_TIME = 600 # seconds # try: # from utils.vector_store import build_vector_store # print("✅ Vector store utilities imported successfully") # for name, doc_path, persist_dir in stores_to_build: # elapsed = time.time() - start_time # if elapsed > MAX_BUILD_TIME: # print( # f"⏰ Build time limit reached ({elapsed:.1f}s), creating empty store at {persist_dir}") # os.makedirs(persist_dir, exist_ok=True) # continue # if os.path.exists(doc_path): # doc_files = [f for f in Path(doc_path).rglob( # "*") if f.is_file() and not f.name.startswith('.')] # if doc_files: # print( # f"📄 Found {len(doc_files)} document(s) for {name}") # try: # build_vector_store( # doc_path=doc_path, persist_directory=persist_dir) # print(f"✅ {name} built successfully") # except Exception as e: # print(f"❌ Error building {name}: {str(e)}") # os.makedirs(persist_dir, exist_ok=True) # else: # print(f"⚠️ No documents found in {doc_path}") # os.makedirs(persist_dir, exist_ok=True) # else: # print(f"⚠️ Document path not found: {doc_path}") # os.makedirs(persist_dir, exist_ok=True) # except ImportError as e: # print(f"❌ Could not import vector store utilities: {e}") # for _, _, persist_dir in stores_to_build: # os.makedirs(persist_dir, exist_ok=True) # else: # print("✅ All vector stores already exist!") # print("🎉 Vector store setup completed!") # def start_server(): # """Start the FastAPI server""" # print("🌐 Starting FastAPI server...") # try: # from api.main import app # print("✅ Successfully imported FastAPI app from api.main") # import uvicorn # port = int(os.environ.get("PORT", 7860)) # host = "0.0.0.0" # print(f"🚀 Starting server on {host}:{port}") # uvicorn.run(app, host=host, port=port, # log_level="info", access_log=True) # except ImportError as e: # print(f"❌ Could not import FastAPI app: {e}") # sys.exit(1) # except Exception as e: # print(f"❌ Error starting server: {e}") # sys.exit(1) # if __name__ == "__main__": # print("=" * 60) # print("🎯 RAG Chatbot - HF Spaces Deployment") # print("=" * 60) # # Setup phase # setup_for_spaces() # # Server start phase # start_server() # app.py - Pure Gradio approach (for Gradio template) from fastapi import Request import requests from dotenv import load_dotenv from utils.vector_store import get_vector_store from pydantic import BaseModel from fastapi import FastAPI, HTTPException, Request import os import sys import gradio as gr from utils.helpers.chat_mapper import map_answer_to_chat_response from fastapi.middleware.cors import CORSMiddleware sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) load_dotenv() app = FastAPI() # Simplified CORS for debugging app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Vector store mapping for different domains VECTOR_STORE_PATHS = { "mes": "./vector_stores/mes_db", "technical": "./vector_stores/tech_db", "general": "./vector_stores/general_db", "default": "./vector_stores/general_db", } class QueryRequest(BaseModel): query: str # Gemini API setup GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY") if not GEMINI_API_KEY: raise ValueError("GEMINI_API_KEY environment variable required") GEMINI_API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent" # Vector store loader def load_vector_store_by_prefix(query: str): lower_q = query.lower().strip() for prefix, path in VECTOR_STORE_PATHS.items(): if prefix != "default" and lower_q.startswith(f"{prefix}:"): cleaned_query = lower_q[len(prefix) + 1:].strip() return get_vector_store(persist_directory=path), cleaned_query, prefix return get_vector_store(persist_directory=VECTOR_STORE_PATHS["default"]), query, "default" def generate_answer_with_gemini(query: str, context_docs: list): # Build context string knowledge_parts = [] for i, doc in enumerate(context_docs, 1): knowledge_parts.append(f"Data Source {i}: {doc.page_content.strip()}") knowledge_base = "\n\n".join(knowledge_parts) # The updated prompt is more direct and forceful prompt = ( "You are an expert AI assistant that uses a provided knowledge base to answer questions. " "Your responses must always be based on this knowledge base, which is the ultimate source of truth. " "You will only use your internal knowledge to supplement the answer, never to contradict it. " "If and only if the knowledge base contains absolutely nothing relevant to the user's question, " "you will respond with a polite and concise statement saying you cannot answer the question from the information you have. " "You must never answer 'I don't know' if there is any information in the knowledge base that is even tangentially related to the question. " "Always try your best to construct a useful answer by synthesizing the provided information. " "Do not refer to the 'knowledge base' or 'sources' directly; instead, use phrases like 'based on the information I have'.\n\n" f"My knowledge base:\n{knowledge_base}\n\n" f"User's Question: {query}\n\nAnswer:" ) # print the prompt for debugging print("Prompt sent to Gemini API:", prompt) try: response = requests.post( f"{GEMINI_API_URL}?key={GEMINI_API_KEY}", json={ "contents": [ { "role": "user", "parts": [ {"text": prompt} ] } ], "generationConfig": { "temperature": 0.7, "maxOutputTokens": 300 } }, timeout=300 ) if response.status_code != 200: return f"API Error: {response.status_code} - {response.text}" data = response.json() # Extract answer text return ( data.get("candidates", [{}])[0] .get("content", {}) .get("parts", [{}])[0] .get("text", "") .strip() or "I couldn't generate an answer." ) except Exception as e: return f"Error: {str(e)}" # Middleware for logging requests @app.middleware("http") async def log_requests(request: Request, call_next): print(f"Request: {request.method} {request.url}") print(f"Headers: {dict(request.headers)}") print(f"Origin: {request.headers.get('origin', 'No Origin')}") print(f"User-Agent: {request.headers.get('user-agent', 'No User-Agent')}") response = await call_next(request) print(f"Response Status: {response.status_code}") return response # NEW: Gradio interface function def gradio_chat_interface(query: str) -> str: """ Gradio interface function that uses your existing FastAPI logic """ try: if not query.strip(): return "Please enter a question." print(f"Gradio query: {query}") # Use your existing logic vector_store, cleaned_query, store_key = load_vector_store_by_prefix( query) if not vector_store: return "Vector store not ready. Please try again later." retriever = vector_store.as_retriever( search_type="mmr", search_kwargs={ "k": 6, "fetch_k": 20, "lambda_mult": 0.5 } ) docs = retriever.get_relevant_documents(cleaned_query) # Deduplicate seen = set() unique_docs = [] for doc in docs: snippet = doc.page_content.strip() if snippet not in seen: seen.add(snippet) unique_docs.append(doc) docs = unique_docs[:5] if not docs: return "I couldn't find any relevant information in the knowledge base to answer your question." answer = generate_answer_with_gemini(cleaned_query, docs) # Format response for Gradio with better markdown formatted_response = f"## Answer\n\n{answer}\n\n" if docs: formatted_response += "## Sources\n\n" for i, doc in enumerate(docs, 1): source_name = doc.metadata.get('source', 'Unknown Source') page = doc.metadata.get('page', '') page_info = f" (Page {page})" if page else "" preview = doc.page_content[:400] + "..." if len( doc.page_content) > 400 else doc.page_content formatted_response += f"### {i}. {source_name}{page_info}\n\n{preview}\n\n---\n\n" return formatted_response except Exception as e: error_msg = f"**Error occurred:**\n\n```\n{str(e)}\n```" print(f"Gradio error: {e}") return error_msg # Create Gradio interface def create_gradio_interface(): """Create and configure the Gradio interface""" with gr.Blocks( title="RAG Chatbot", description="Ask questions about your knowledge base and get detailed answers with sources.", theme='soft', ) as interface: gr.Markdown(""" # RAG Chatbot Ask questions about your knowledge base and get detailed answers with sources. **Available Knowledge:** - MES Manual documentation (prefix with "mes:") - Technical documentation (prefix with "technical:") - General documentation (prefix with "general:" or no prefix) """) with gr.Row(): with gr.Column(scale=4): query_input = gr.Textbox( label="Your Question", placeholder="Enter your question here... (e.g., 'What is machine learning?' or 'mes: How does the system work?')", lines=3, max_lines=10 ) with gr.Column(scale=1): submit_btn = gr.Button( "Ask Question", variant="primary", size="lg") clear_btn = gr.Button("Clear", variant="secondary") answer_output = gr.Markdown( label="Answer & Sources", value="Welcome! Ask a question above to get started." ) # Event handlers submit_btn.click( gradio_chat_interface, inputs=[query_input], outputs=[answer_output] ) query_input.submit( # Allow Enter key to submit gradio_chat_interface, inputs=[query_input], outputs=[answer_output] ) clear_btn.click( lambda: ("", "Welcome! Ask a question above to get started."), outputs=[query_input, answer_output] ) # Example questions gr.Examples( examples=[ ["What is machine learning and how does it work?"], ["mes: How does the MES system handle production data?"], ["technical: Explain the database architecture"], ["What are the main components of the system?"], ["How do I configure the application settings?"] ], inputs=[query_input], label="Example Questions" ) gr.Markdown(""" --- **Tips:** - Use prefixes (mes:, technical:, general:) to search specific knowledge bases - Be specific with your questions for better results - Sources are provided with each answer for verification **Technical Info:** - Powered by FastAPI backend - Vector search with MMR retrieval - Gemini 2.0 Flash for answer generation """) return interface # API Endpoints @app.get("/") def root(): return { "status": "running", "model": "gemini-2.0-flash", "using_direct_api": True, "client_ready": True, "gradio_interface": "/gradio" } @app.post("/") async def ask_question(request: Request): try: # Print raw incoming request body raw_body = await request.body() print("Incoming POST request body:") print(raw_body.decode("utf-8")) # Parse into your Pydantic model parsed_request = QueryRequest.model_validate_json(raw_body) print("Parsed request object:", parsed_request) vector_store, cleaned_query, store_key = load_vector_store_by_prefix( parsed_request.query ) if not vector_store: raise HTTPException( status_code=500, detail="Vector store not ready" ) retriever = vector_store.as_retriever( search_type="mmr", search_kwargs={ "k": 6, "fetch_k": 20, "lambda_mult": 0.5 } ) docs = retriever.get_relevant_documents(cleaned_query) # Deduplicate seen = set() unique_docs = [] for doc in docs: snippet = doc.page_content.strip() if snippet not in seen: seen.add(snippet) unique_docs.append(doc) docs = unique_docs[:5] if not docs: return { "answer": "I couldn't find any relevant information in the knowledge base to answer your question.", "model_used": "gemini-2.0-flash", "vector_store_used": VECTOR_STORE_PATHS[store_key], "sources": [] } answer = generate_answer_with_gemini(cleaned_query, docs) answer_obj = { "answer": answer, "model_used": "gemini-2.0-flash", "vector_store_used": VECTOR_STORE_PATHS[store_key], "sources": [ { "content": doc.page_content[:500] + "...\n", "metadata": doc.metadata } for doc in docs ] } return map_answer_to_chat_response(answer_obj) except Exception as e: print(f"Error in ask_question: {e}") raise HTTPException(status_code=500, detail=f"Error: {str(e)}") # Create the Gradio interface demo = create_gradio_interface() # Mount Gradio on FastAPI at /gradio app = gr.mount_gradio_app(app, demo, path="/gradio") # Add a redirect for convenience @app.get("/ui") async def redirect_to_gradio(): """Redirect /ui to /gradio for easier access""" from fastapi.responses import RedirectResponse return RedirectResponse(url="/gradio") # Health check endpoint @app.get("/health") def health_check(): return {"status": "healthy", "gradio_mounted": True} if __name__ == "__main__": import uvicorn port = int(os.environ.get("PORT", 8000)) uvicorn.run(app, host="0.0.0.0", port=port)