# app.py
# Hugging Face Space: Gradio app that chats about Gradio docs via the Gradio Docs MCP server.
# Requirements:
#   - gradio
#   - huggingface_hub
#
# Space secret needed:
#   - HUGGING_FACE_HUB_TOKEN or HF_TOKEN  (must have access to provider="novita")

import os
import asyncio
from typing import Any, Dict, Iterable, List, Optional

import gradio as gr
from huggingface_hub import MCPClient

# ----------------------------
# Configuration
# ----------------------------
GRADIO_DOCS_MCP_SSE = os.environ.get(
    "GRADIO_DOCS_MCP_SSE",
    "https://gradio-docs-mcp.hf.space/gradio_api/mcp/sse",
)

# Use Novita provider + the model you specified
MODEL_ID = os.environ.get("CHAT_MODEL", "openai/gpt-oss-20b")
PROVIDER = os.environ.get("CHAT_PROVIDER", "novita")  # <-- IMPORTANT
# Accept either env name
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")

SYSTEM_PROMPT = (
    "You are a helpful assistant that answers questions strictly using the Gradio documentation "
    "via the MCP tools provided by the Gradio Docs MCP server. Prefer the latest docs. "
    "When helpful, cite classes/functions (e.g., gr.Interface) and include short code examples."
)

# ----------------------------
# MCP Client (lazy init)
# ----------------------------
mcp_client: Optional[MCPClient] = None
_initialized = False
_init_lock = asyncio.Lock()


def get_mcp_client() -> MCPClient:
    """
    Create a single global MCPClient configured to use provider='novita'
    and the given MODEL_ID. MCPClient internally uses huggingface_hub's
    InferenceClient, so this matches your direct-Novita snippet.
    """
    global mcp_client
    if mcp_client is None:
        mcp_client = MCPClient(
            model=MODEL_ID,
            provider=PROVIDER,  # novita
            api_key=HF_TOKEN,   # token must permit novita access
        )
    return mcp_client


async def ensure_init():
    """
    Lazily attach the Gradio Docs MCP server. In some huggingface_hub versions
    add_mcp_server is async, so it must be awaited exactly once.
    """
    global _initialized
    if _initialized:
        return

    async with _init_lock:
        if _initialized:
            return
        client = get_mcp_client()
        # Await the coroutine to avoid "was never awaited" warnings
        await client.add_mcp_server(
            type="sse",
            url=GRADIO_DOCS_MCP_SSE,
            timeout=30,
        )
        _initialized = True


# ----------------------------
# Helpers for messages
# ----------------------------
def to_llm_messages(history_msgs: List[Dict[str, Any]], user_msg: str) -> List[Dict[str, Any]]:
    """
    Convert Chatbot messages list (role/content dicts) to the LLM format,
    with a system message prepended and the new user message appended.
    """
    msgs: List[Dict[str, Any]] = [{"role": "system", "content": SYSTEM_PROMPT}]
    for m in history_msgs or []:
        role = m.get("role")
        content = m.get("content")
        if role in ("user", "assistant") and isinstance(content, str):
            msgs.append({"role": role, "content": content})
    msgs.append({"role": "user", "content": user_msg})
    return msgs


async def stream_answer(messages_for_llm: List[Dict[str, Any]]) -> Iterable[str]:
    """
    Stream deltas and tool logs from MCPClient.process_single_turn_with_tools.
    """
    await ensure_init()
    client = get_mcp_client()

    # Pre-flight checks
    if not HF_TOKEN:
        yield (
            "⚠️ Missing token: set `HF_TOKEN` (or `HUGGING_FACE_HUB_TOKEN`) "
            "in your Space **Settings → Secrets**. The token must allow provider='novita'."
        )
        return

    try:
        async for chunk in client.process_single_turn_with_tools(messages_for_llm):
            if isinstance(chunk, dict):
                ctype = chunk.get("type")
                if ctype == "tool_log":
                    name = chunk.get("tool", "tool")
                    status = chunk.get("status", "")
                    yield f"\n\n_(using **{name}** {status})_"
                elif ctype == "text_delta":
                    yield chunk.get("delta", "")
                elif ctype == "text":
                    yield chunk.get("text", "")
                elif ctype == "tool_result":
                    content = chunk.get("content")
                    if isinstance(content, str) and content.strip():
                        yield f"\n\n**Result:**\n{content}"
            else:
                yield str(chunk)
    except Exception as e:
        msg = str(e)
        # Common failure modes
        if "401" in msg or "Unauthorized" in msg:
            yield (
                "❌ Unauthorized (401). Your model call was rejected by Novita.\n\n"
                "- Ensure the Space secret `HF_TOKEN` is set and valid.\n"
                "- Confirm `HF_TOKEN` has access to provider='novita' and the model.\n"
            )
        elif "400" in msg or "Bad Request" in msg:
            yield (
                "❌ Bad Request (400). The Novita endpoint rejected the request.\n\n"
                "- Double-check `CHAT_MODEL` (currently "
                f"`{MODEL_ID}`) is valid for provider='novita'.\n"
                "- Ensure your `HF_TOKEN` has the necessary permissions.\n"
            )
        else:
            yield f"❌ Error: {msg}"


# ----------------------------
# Gradio UI
# ----------------------------
with gr.Blocks(fill_height=True) as demo:
    gr.Markdown(
        "# 🤖 Gradio Docs Chat (MCP Client via Novita)\n"
        "Ask anything about **Gradio**. Answers are grounded in the official docs via MCP."
    )

    chat = gr.Chatbot(
        label="Gradio Docs Assistant",
        height=520,
        type="messages",   # expects: [{"role": "...", "content": "..."}]
    )

    with gr.Row():
        msg = gr.Textbox(
            placeholder="e.g., How do I use gr.Interface with multiple inputs?",
            scale=9,
            autofocus=True,
        )
        send_btn = gr.Button("Send", scale=1, variant="primary")

    with gr.Row():
        clear = gr.ClearButton(components=[chat, msg], value="Clear")
        info = gr.Markdown(
            f"**Model:** `{MODEL_ID}` · **Provider:** `{PROVIDER}` · **MCP:** Gradio Docs SSE",
        )

    async def on_submit(user_msg: str, history_msgs: List[Dict[str, Any]]):
        """
        history_msgs is a list of {"role": ..., "content": ...} dicts.
        We append the user's message, then stream the assistant reply by
        updating the last assistant message content.
        """
        history_msgs = (history_msgs or []) + [{"role": "user", "content": user_msg}]
        history_msgs.append({"role": "assistant", "content": ""})
        yield history_msgs

        messages_for_llm = to_llm_messages(history_msgs[:-1], user_msg)
        async for delta in stream_answer(messages_for_llm):
            history_msgs[-1]["content"] += delta
            yield history_msgs

    msg.submit(on_submit, inputs=[msg, chat], outputs=chat, queue=True)
    send_btn.click(on_submit, inputs=[msg, chat], outputs=chat, queue=True)

if __name__ == "__main__":
    demo.launch()