Spaces:

sudipta26889
/

gradio-doc

Sleeping

File size: 12,052 Bytes

e1c5ae4
 
6d7a07a
 
 
 
 
 
d62a2bb
 
 
1c5b9ef
6d7a07a
 
 
 
 
d62a2bb
e1c5ae4
0102b23
e1c5ae4
6d7a07a
0102b23
 
 
 
e1c5ae4
 
 
 
 
 
 
0102b23
6d7a07a
 
 
51bd84d
0102b23
6d7a07a
 
e1c5ae4
6d7a07a
e1c5ae4
6d7a07a
 
 
e1c5ae4
6d7a07a
e1c5ae4
 
 
 
0102b23
6d7a07a
d62a2bb
0102b23
6d7a07a
 
 
 
 
 
 
e1c5ae4
 
6d7a07a
e1c5ae4
0102b23
6d7a07a
 
e1c5ae4
 
 
 
 
 
6d7a07a
 
e1c5ae4
 
 
6d7a07a
e1c5ae4
 
 
 
6d7a07a
e1c5ae4
6d7a07a
e1c5ae4
1c5b9ef
 
e1c5ae4
6d7a07a
51bd84d
1c5b9ef
 
 
 
 
 
 
6d7a07a
 
 
 
 
 
 
 
 
 
 
 
1c5b9ef
6d7a07a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0102b23
e1c5ae4
6d7a07a
0102b23
6d7a07a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e1c5ae4
 
 
1c5b9ef
e1c5ae4
 
6d7a07a
e1c5ae4
 
 
6d7a07a
 
 
e1c5ae4
6d7a07a
 
e1c5ae4
6d7a07a
 
e1c5ae4
6d7a07a
e1c5ae4
6d7a07a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e1c5ae4
6d7a07a
 
e1c5ae4
 
 
6d7a07a
 
 
e1c5ae4
0102b23
6d7a07a
 
0102b23
e1c5ae4
 
 
0102b23
e1c5ae4
6d7a07a
 
e1c5ae4
 
0102b23
6d7a07a
 
 
 
9ed206f
6d7a07a
 
 
 
 
 
 
 
0102b23
6d7a07a
e1c5ae4
6d7a07a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c5b9ef
6d7a07a
 
 
 
1c5b9ef
 
 
6d7a07a
 
 
1c5b9ef
6d7a07a
 
 
 
 
0102b23
6d7a07a
 
0102b23
d62a2bb
 
 
9ed206f

# app.py
# Hugging Face Space: Gradio app that chats about Gradio docs via the Gradio Docs MCP server.
# Features:
#   • MCP tool-calling (connects to the official Gradio Docs MCP SSE server)
#   • Streaming responses with live tool logs
#   • Optional "Concise / Detailed" answer style
#   • Lightweight citations panel (summarizes MCP tool hits)
#
# Requirements (in requirements.txt):
#   gradio>=4.44.0
#   huggingface_hub>=0.24.0
#
# Space secrets needed:
#   - HUGGING_FACE_HUB_TOKEN or HF_TOKEN  (token must allow using the selected provider/model)
#
# Notes:
#   - The default model/provider below are known to work with chat + tool calling via the HF router.
#   - You can override via Space Secrets: CHAT_MODEL, CHAT_PROVIDER, GRADIO_DOCS_MCP_SSE.

import os
import asyncio
from typing import Any, Dict, Iterable, List, Optional, Tuple

import gradio as gr
from huggingface_hub import MCPClient

# ----------------------------
# Configuration
# ----------------------------
GRADIO_DOCS_MCP_SSE = os.environ.get(
    "GRADIO_DOCS_MCP_SSE",
    "https://gradio-docs-mcp.hf.space/gradio_api/mcp/sse",
)

# Use a router model that supports OpenAI-style chat + tool calling.
MODEL_ID = os.environ.get("CHAT_MODEL", "Qwen/Qwen2.5-7B-Instruct")
PROVIDER = os.environ.get("CHAT_PROVIDER", "hf-inference")  # router
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")

BASE_SYSTEM_PROMPT = (
    "You are a helpful assistant that answers strictly using the Gradio documentation "
    "via the MCP tools provided by the Gradio Docs MCP server. Prefer the latest docs. "
    "Cite relevant class/function names (e.g., gr.Interface) and include short code examples when helpful."
)
CONCISE_SUFFIX = " Keep answers concise (3–6 sentences) unless code is necessary."
DETAILED_SUFFIX = " Provide a detailed, step-by-step answer with short code where helpful."

# ----------------------------
# MCP Client (lazy init, reconfigurable)
# ----------------------------
mcp_client: Optional[MCPClient] = None
_initialized = False
_init_lock = asyncio.Lock()

def _current_system_prompt(style: str) -> str:
    return BASE_SYSTEM_PROMPT + (CONCISE_SUFFIX if style == "Concise" else DETAILED_SUFFIX)

def _reset_client():
    """Reset the global client so a new one is created with updated env (if any)."""
    global mcp_client, _initialized
    mcp_client = None
    _initialized = False

def get_mcp_client(model_id: str, provider: str, api_key: Optional[str]) -> MCPClient:
    global mcp_client
    if mcp_client is None:
        mcp_client = MCPClient(model=model_id, provider=provider, api_key=api_key)
    return mcp_client

async def ensure_init(model_id: str, provider: str, api_key: Optional[str]):
    """Attach the Gradio Docs MCP server exactly once."""
    global _initialized
    if _initialized:
        return
    async with _init_lock:
        if _initialized:
            return
        client = get_mcp_client(model_id, provider, api_key)
        # add_mcp_server may be a coroutine; await to avoid warnings
        await client.add_mcp_server(
            type="sse",
            url=GRADIO_DOCS_MCP_SSE,
            timeout=45,
        )
        _initialized = True

# ----------------------------
# Message helpers
# ----------------------------
def to_llm_messages(history_msgs: List[Dict[str, Any]], user_msg: str, style: str) -> List[Dict[str, Any]]:
    """
    Convert Chatbot messages list (role/content dicts) to the LLM format,
    with a system message prepended and the new user message appended.
    """
    msgs: List[Dict[str, Any]] = [{"role": "system", "content": _current_system_prompt(style)}]
    for m in history_msgs or []:
        role = m.get("role")
        content = m.get("content")
        if role in ("user", "assistant") and isinstance(content, str):
            msgs.append({"role": role, "content": content})
    msgs.append({"role": "user", "content": user_msg})
    return msgs

# ----------------------------
# Streaming + side-panels (tool logs & citations)
# ----------------------------
def _append_log(log_lines: List[str], line: str, max_lines: int = 200) -> None:
    log_lines.append(line)
    if len(log_lines) > max_lines:
        del log_lines[: len(log_lines) - max_lines]

def _format_tool_log(log_lines: List[str]) -> str:
    if not log_lines:
        return "_No tool activity yet._"
    return "\n".join(log_lines)

def _format_citations(cites: List[Tuple[str, Optional[str]]]) -> str:
    if not cites:
        return "_No citations captured yet._"
    # Show up to the last 12 citations (tool name + optional URL/source)
    last = cites[-12:]
    lines = []
    for (label, url) in last:
        if url:
            lines.append(f"- **{label}** — {url}")
        else:
            lines.append(f"- **{label}**")
    return "\n".join(lines)

async def stream_answer(
    messages_for_llm: List[Dict[str, Any]],
    model_id: str,
    provider: str,
    api_key: Optional[str],
) -> Iterable[Dict[str, Any]]:
    """
    Stream deltas and tool logs from MCPClient.process_single_turn_with_tools.
    Yield dicts with partial text plus updated tool logs & citations so the UI can update three panels.
    """
    await ensure_init(model_id, provider, api_key)
    client = get_mcp_client(model_id, provider, api_key)

    tool_log: List[str] = []
    citations: List[Tuple[str, Optional[str]]] = []  # (label, url)

    # Early token check
    if not api_key:
        yield {
            "delta": (
                "⚠️ Missing token: set `HUGGING_FACE_HUB_TOKEN` or `HF_TOKEN` "
                "in your Space **Settings → Secrets** to authenticate model calls."
            ),
            "tool_log": _format_tool_log(tool_log),
            "citations": _format_citations(citations),
        }
        return

    try:
        async for chunk in client.process_single_turn_with_tools(messages_for_llm):
            if isinstance(chunk, dict):
                ctype = chunk.get("type")

                if ctype == "tool_log":
                    name = chunk.get("tool", "tool")
                    status = chunk.get("status", "")
                    _append_log(tool_log, f"- {name} **{status}**")
                    yield {"delta": "", "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}

                elif ctype == "text_delta":
                    yield {"delta": chunk.get("delta", ""), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}

                elif ctype == "text":
                    yield {"delta": chunk.get("text", ""), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}

                elif ctype == "tool_result":
                    tool_name = chunk.get("tool", "tool")
                    content = chunk.get("content")
                    url = None
                    if isinstance(content, dict):
                        url = content.get("url") or content.get("link")
                        title = content.get("title") or content.get("name")
                        label = title or tool_name
                    elif isinstance(content, str):
                        label = tool_name
                        if "http://" in content or "https://" in content:
                            start = content.find("http")
                            url = content[start : start + 200].split("\n")[0].strip()
                    else:
                        label = tool_name

                    citations.append((label, url))
                    _append_log(tool_log, f"  • {tool_name} returned result")
                    snippet = ""
                    if isinstance(content, str):
                        snippet = content.strip()
                        if len(snippet) > 700:
                            snippet = snippet[:700] + "…"
                        snippet = f"\n\n**Result (from {tool_name}):**\n{snippet}"
                    yield {
                        "delta": snippet,
                        "tool_log": _format_tool_log(tool_log),
                        "citations": _format_citations(citations),
                    }

            else:
                yield {"delta": str(chunk), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}

    except Exception as e:
        msg = str(e)
        if "401" in msg or "Unauthorized" in msg:
            err = (
                "❌ Unauthorized (401). Ensure your token is set and permitted for the chosen model/provider.\n"
                f"- Model: `{model_id}`\n- Provider: `{provider}`\n"
            )
        else:
            err = f"❌ Error: {msg}"
        yield {"delta": err, "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}

# ----------------------------
# Gradio UI
# ----------------------------
with gr.Blocks(fill_height=True) as demo:
    gr.Markdown(
        "# 🤖 Gradio Docs Chat (MCP Client)\n"
        "Ask anything about **Gradio**. Answers are grounded in the official docs via MCP tools."
    )

    with gr.Row():
        with gr.Column(scale=7):
            chat = gr.Chatbot(
                label="Gradio Docs Assistant",
                height=520,
                type="messages",
            )
            with gr.Row():
                msg = gr.Textbox(
                    placeholder="e.g., How do I use gr.Interface with multiple inputs?",
                    scale=9,
                    autofocus=True,
                )
                send_btn = gr.Button("Send", scale=1, variant="primary")

            clear = gr.ClearButton(components=[chat, msg], value="Clear")

        with gr.Column(scale=5):
            with gr.Accordion("⚙️ Settings", open=False):
                style = gr.Radio(
                    label="Answer Style",
                    choices=["Concise", "Detailed"],
                    value="Detailed",
                )
                model_read = gr.Markdown(
                    f"**Model:** `{MODEL_ID}`  \n**Provider:** `{PROVIDER}`  \n"
                    "_(override via Space Secrets: `CHAT_MODEL`, `CHAT_PROVIDER`)_"
                )

            with gr.Accordion("🛠 Tool Activity (live)", open=True):
                tool_log_md = gr.Markdown("_No tool activity yet._")

            with gr.Accordion("📎 Citations (recent)", open=True):
                citations_md = gr.Markdown("_No citations captured yet._")

    async def on_submit(user_msg: str, history_msgs: List[Dict[str, Any]], style_choice: str):
        """
        Append the user's message, then stream the assistant reply while updating:
          - chat text
          - tool activity
          - citations
        """
        history_msgs = (history_msgs or []) + [{"role": "user", "content": user_msg}]
        history_msgs.append({"role": "assistant", "content": ""})
        yield history_msgs, gr.update(value="_No tool activity yet._"), gr.update(value="_No citations captured yet._")

        messages_for_llm = to_llm_messages(history_msgs[:-1], user_msg, style_choice)

        async for chunk in stream_answer(messages_for_llm, MODEL_ID, PROVIDER, HF_TOKEN):
            delta = chunk.get("delta", "")
            if delta:
                history_msgs[-1]["content"] += delta
            yield history_msgs, gr.update(value=chunk.get("tool_log", "")), gr.update(value=chunk.get("citations", ""))

    msg.submit(on_submit, inputs=[msg, chat, style], outputs=[chat, tool_log_md, citations_md], queue=True)
    send_btn.click(on_submit, inputs=[msg, chat, style], outputs=[chat, tool_log_md, citations_md], queue=True)

# ----------------------------
# Gradio runtime (queue + launch)
# ----------------------------
# IMPORTANT: assign the queued app back to 'demo' for older Gradio versions.
demo = demo.queue(max_size=32)

# Always launch; Spaces runs this script directly.
demo.launch(
    ssr_mode=False,       # disable SSR to avoid Node helper churn
    server_name="0.0.0.0",
    server_port=7860,
)