from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
import gradio as gr
import os
import json
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import requests
import nest_asyncio
import uvicorn

# ✅ Load documents and FAISS index
with open("texts.json", "r", encoding="utf-8") as f:
    texts = json.load(f)

index = faiss.read_index("faiss_index.bin")
embed_model = SentenceTransformer("all-MiniLM-L6-v2")

# ✅ API keys and config
API_KEY = os.environ.get("OPENROUTER_API_KEY")
MODEL = "nousresearch/deephermes-3-llama-3-8b-preview:free"  # Updated model

app = FastAPI()

# ✅ Greeting checker with added variations
def is_greeting(text):
    greetings = [
        "hi", "hello", "hey", "good morning", "good afternoon", "good evening",
        "howdy", "yo", "sup", "greetings", "what's up", "hey there"
    ]
    return any(g in text.lower() for g in greetings)

# ✅ Context fetcher
def get_context(query, top_k=5, threshold=0.3):
    query_vec = embed_model.encode([query])
    D, I = index.search(np.array(query_vec), top_k)
    
    matches = [(texts[i], d) for i, d in zip(I[0], D[0]) if d >= threshold]
    if not matches:
        return ""
    
    return "\n".join([text for text, _ in matches])

# ✅ Core chat function
def chat_fn(message, history):
    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type": "application/json"
    }

    context = get_context(message)

    # ✅ Handle greetings when no context found
    if not context.strip():
        if is_greeting(message):
            return "👋 Hey there! How can I help you today?"
        return "🤔 Hmm, I’m not sure about that. Could you rephrase your question?"

    # ✅ Construct prompt with friendly natural tone
    messages = [
        {
            "role": "system",
            "content": (
                "You are a friendly, helpful assistant."
                " Use the following context to answer questions naturally and clearly."
                " Be conversational and concise. Avoid saying you're using context."
                " If unsure, say: 'I’m not sure about that. Could you rephrase?'"
                "\n\nContext:\n" + context
            )
        }
    ]

    for user, assistant in history:
        messages.append({"role": "user", "content": user})
        messages.append({"role": "assistant", "content": assistant})

    messages.append({"role": "user", "content": message + "\n\nKeep your reply short and natural."})

    payload = {
        "model": MODEL,
        "messages": messages,
        "max_tokens": 200,
    }

    try:
        response = requests.post(
            "https://openrouter.ai/api/v1/chat/completions",
            headers=headers,
            json=payload,
            timeout=30
        )
        response.raise_for_status()
        reply = response.json()["choices"][0]["message"]["content"]
    except Exception as e:
        reply = f"⚠️ API error: {str(e)[:100]}"

    return reply.strip()

# ✅ POST endpoint for API usage
@app.post("/chat")
async def chat_api(request: Request):
    body = await request.json()
    message = body.get("message", "").strip()
    history = body.get("history", [])
    
    if not message:
        return JSONResponse(content={"response": "⚠️ Please enter a valid message."})
    
    response = chat_fn(message, history)
    return JSONResponse(content={"response": response})

# ✅ Gradio interface
demo = gr.ChatInterface(
    fn=chat_fn,
    title="💬 CODEX MIRXA KAMRAN",
    description="Chat with our AI assistant based on internal knowledge. Short, natural and helpful answers!",
    theme="soft"
)

app = gr.mount_gradio_app(app, demo, path="/")

# ✅ Local run
if __name__ == "__main__":
    nest_asyncio.apply()
    uvicorn.run(app, host="0.0.0.0", port=7860)