from fastapi import FastAPI, Request from fastapi.responses import JSONResponse import gradio as gr import os import json import faiss import numpy as np from sentence_transformers import SentenceTransformer import requests import nest_asyncio import uvicorn # ✅ Load documents and FAISS index with open("texts.json", "r", encoding="utf-8") as f: texts = json.load(f) index = faiss.read_index("faiss_index.bin") embed_model = SentenceTransformer("all-MiniLM-L6-v2") # ✅ API keys and config API_KEY = os.environ.get("OPENROUTER_API_KEY") MODEL = "nousresearch/deephermes-3-llama-3-8b-preview:free" # Updated model app = FastAPI() # ✅ Greeting checker with added variations def is_greeting(text): greetings = [ "hi", "hello", "hey", "good morning", "good afternoon", "good evening", "howdy", "yo", "sup", "greetings", "what's up", "hey there" ] return any(g in text.lower() for g in greetings) # ✅ Context fetcher def get_context(query, top_k=5, threshold=0.3): query_vec = embed_model.encode([query]) D, I = index.search(np.array(query_vec), top_k) matches = [(texts[i], d) for i, d in zip(I[0], D[0]) if d >= threshold] if not matches: return "" return "\n".join([text for text, _ in matches]) # ✅ Core chat function def chat_fn(message, history): headers = { "Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json" } context = get_context(message) # ✅ Handle greetings when no context found if not context.strip(): if is_greeting(message): return "👋 Hey there! How can I help you today?" return "🤔 Hmm, I’m not sure about that. Could you rephrase your question?" # ✅ Construct prompt with friendly natural tone messages = [ { "role": "system", "content": ( "You are a friendly, helpful assistant." " Use the following context to answer questions naturally and clearly." " Be conversational and concise. Avoid saying you're using context." " If unsure, say: 'I’m not sure about that. Could you rephrase?'" "\n\nContext:\n" + context ) } ] for user, assistant in history: messages.append({"role": "user", "content": user}) messages.append({"role": "assistant", "content": assistant}) messages.append({"role": "user", "content": message + "\n\nKeep your reply short and natural."}) payload = { "model": MODEL, "messages": messages, "max_tokens": 200, } try: response = requests.post( "https://openrouter.ai/api/v1/chat/completions", headers=headers, json=payload, timeout=30 ) response.raise_for_status() reply = response.json()["choices"][0]["message"]["content"] except Exception as e: reply = f"⚠️ API error: {str(e)[:100]}" return reply.strip() # ✅ POST endpoint for API usage @app.post("/chat") async def chat_api(request: Request): body = await request.json() message = body.get("message", "").strip() history = body.get("history", []) if not message: return JSONResponse(content={"response": "⚠️ Please enter a valid message."}) response = chat_fn(message, history) return JSONResponse(content={"response": response}) # ✅ Gradio interface demo = gr.ChatInterface( fn=chat_fn, title="💬 CODEX MIRXA KAMRAN", description="Chat with our AI assistant based on internal knowledge. Short, natural and helpful answers!", theme="soft" ) app = gr.mount_gradio_app(app, demo, path="/") # ✅ Local run if __name__ == "__main__": nest_asyncio.apply() uvicorn.run(app, host="0.0.0.0", port=7860)