from fastapi import FastAPI, Request from fastapi.responses import JSONResponse import gradio as gr import os import json import faiss import numpy as np from sentence_transformers import SentenceTransformer import requests import nest_asyncio import uvicorn # ✅ Load RAG-related files with open("texts.json", "r", encoding="utf-8") as f: texts = json.load(f) index = faiss.read_index("faiss_index.bin") embed_model = SentenceTransformer("all-MiniLM-L6-v2") API_KEY = os.environ.get("OPENROUTER_API_KEY") MODEL = "qwen/qwq-32b:free" # ✅ FastAPI app app = FastAPI() # ✅ Context Retriever with threshold def get_context(query, top_k=5, threshold=0.3): query_vec = embed_model.encode([query]) D, I = index.search(np.array(query_vec), top_k) # Filter based on similarity distance threshold results = [(texts[i], d) for i, d in zip(I[0], D[0]) if d >= threshold] if not results: return "" return "\n".join([text for text, _ in results]) # ✅ Chat Function (RAG-aware + fallback) def chat_fn(message, history): headers = { "Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json" } context = get_context(message) # No valid context? Respond with fallback message if not context.strip(): return "❌ Sorry, I can't answer that." messages = [ { "role": "system", "content": "You are a helpful assistant. Only use the context below to answer. If you can't find an answer in it, reply: 'Sorry, I can't answer that.'\n\nContext:\n" + context } ] for user, assistant in history: messages.append({"role": "user", "content": user}) messages.append({"role": "assistant", "content": assistant}) messages.append({"role": "user", "content": message}) payload = {"model": MODEL, "messages": messages} try: response = requests.post("https://openrouter.ai/api/v1/chat/completions", headers=headers, json=payload) response.raise_for_status() reply = response.json()["choices"][0]["message"]["content"] except Exception as e: reply = f"❌ Error: {e}" return reply # ✅ FastAPI POST endpoint @app.post("/chat") async def chat_api(request: Request): body = await request.json() message = body.get("message") history = body.get("history", []) response = chat_fn(message, history) return JSONResponse(content={"response": response}) # ✅ Gradio Interface demo = gr.ChatInterface( fn=chat_fn, title="CODEX MIRXA KAMRAN", description="Chat with AI MODEL trained by Mirxa Kamran", theme="soft" ) # ✅ Mount Gradio at root app = gr.mount_gradio_app(app, demo, path="/") # ✅ For local development if __name__ == "__main__": nest_asyncio.apply() uvicorn.run(app, host="0.0.0.0", port=7860)