gradio-doc / app.py
sudipta26889's picture
Update app.py
b7bf9b1 verified
raw
history blame
13.8 kB
# app.py
# Hugging Face Space: Gradio app that chats about Gradio docs via the Gradio Docs MCP server.
# Features:
# β€’ MCP tool-calling (connects to the official Gradio Docs MCP SSE server)
# β€’ Streaming responses with live tool logs
# β€’ Optional "Concise / Detailed" answer style
# β€’ Lightweight citations panel (summarizes MCP tool hits)
#
# Requirements (in requirements.txt):
# gradio>=4.44.0
# huggingface_hub>=0.24.0
# spaces # only needed if your Space hardware is ZeroGPU
#
# Space secrets needed:
# - HUGGING_FACE_HUB_TOKEN or HF_TOKEN (token must allow using the selected provider/model)
#
# Notes:
# - The default model/provider below are known to work with chat + tool calling via the HF router.
# - You can override via Space Secrets: CHAT_MODEL, CHAT_PROVIDER, GRADIO_DOCS_MCP_SSE.
import os
import asyncio
import inspect
from typing import Any, Dict, Iterable, List, Optional, Tuple
import gradio as gr
from huggingface_hub import MCPClient
# -------------------------------------------------
# Optional ZeroGPU shim (silences "No @spaces.GPU..." startup message)
# -------------------------------------------------
SPACES_ZERO_GPU = bool(os.environ.get("SPACE_ZERO_GPU", "")) # HF sets this in ZeroGPU envs
try:
import spaces # type: ignore
except Exception:
spaces = None # not installed on CPU Spaces; harmless
if spaces is not None:
@spaces.GPU # this just lets ZeroGPU detect a GPU-capable function at startup
def _zero_gpu_probe():
# We don't actually need a GPU for this app (remote inference),
# but the decorated function's presence satisfies ZeroGPU's startup check.
return "ok"
# ----------------------------
# Configuration
# ----------------------------
GRADIO_DOCS_MCP_SSE = os.environ.get(
"GRADIO_DOCS_MCP_SSE",
"https://gradio-docs-mcp.hf.space/gradio_api/mcp/sse",
)
# Use a router model that supports OpenAI-style chat + tool calling.
MODEL_ID = os.environ.get("CHAT_MODEL", "meta-llama/Meta-Llama-3.1-8B-Instruct")
PROVIDER = os.environ.get("CHAT_PROVIDER", "hf-inference") # router
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
BASE_SYSTEM_PROMPT = (
"You are a helpful assistant that answers strictly using the Gradio documentation "
"via the MCP tools provided by the Gradio Docs MCP server. Prefer the latest docs. "
"Cite relevant class/function names (e.g., gr.Interface) and include short code examples when helpful."
)
CONCISE_SUFFIX = " Keep answers concise (3–6 sentences) unless code is necessary."
DETAILED_SUFFIX = " Provide a detailed, step-by-step answer with short code where helpful."
# ----------------------------
# MCP Client (lazy init, reconfigurable)
# ----------------------------
mcp_client: Optional[MCPClient] = None
_initialized = False
_init_lock = asyncio.Lock()
def _current_system_prompt(style: str) -> str:
return BASE_SYSTEM_PROMPT + (CONCISE_SUFFIX if style == "Concise" else DETAILED_SUFFIX)
def _reset_client():
"""Reset the global client so a new one is created with updated env (if any)."""
global mcp_client, _initialized
mcp_client = None
_initialized = False
def get_mcp_client(model_id: str, provider: str, api_key: Optional[str]) -> MCPClient:
global mcp_client
if mcp_client is None:
mcp_client = MCPClient(model=model_id, provider=provider, api_key=api_key)
return mcp_client
async def _maybe_await(result):
"""Await result if it's an awaitable (handles old/new hub versions)."""
if inspect.isawaitable(result):
return await result
return result
async def ensure_init(model_id: str, provider: str, api_key: Optional[str]):
"""Attach the Gradio Docs MCP server exactly once (supports async/sync add_mcp_server)."""
global _initialized
if _initialized:
return
async with _init_lock:
if _initialized:
return
client = get_mcp_client(model_id, provider, api_key)
# add_mcp_server may be coroutine or sync depending on version
await _maybe_await(
client.add_mcp_server(
type="sse",
url=GRADIO_DOCS_MCP_SSE,
timeout=45,
)
)
_initialized = True
# ----------------------------
# Message helpers
# ----------------------------
def to_llm_messages(history_msgs: List[Dict[str, Any]], user_msg: str, style: str) -> List[Dict[str, Any]]:
"""
Convert Chatbot messages list (role/content dicts) to the LLM format,
with a system message prepended and the new user message appended.
"""
msgs: List[Dict[str, Any]] = [{"role": "system", "content": _current_system_prompt(style)}]
for m in history_msgs or []:
role = m.get("role")
content = m.get("content")
if role in ("user", "assistant") and isinstance(content, str):
msgs.append({"role": role, "content": content})
msgs.append({"role": "user", "content": user_msg})
return msgs
# ----------------------------
# Streaming + side-panels (tool logs & citations)
# ----------------------------
def _append_log(log_lines: List[str], line: str, max_lines: int = 200) -> None:
log_lines.append(line)
if len(log_lines) > max_lines:
del log_lines[: len(log_lines) - max_lines]
def _format_tool_log(log_lines: List[str]) -> str:
if not log_lines:
return "_No tool activity yet._"
return "\n".join(log_lines)
def _format_citations(cites: List[Tuple[str, Optional[str]]]) -> str:
if not cites:
return "_No citations captured yet._"
last = cites[-12:] # show recent
lines = []
for (label, url) in last:
if url:
lines.append(f"- **{label}** β€” {url}")
else:
lines.append(f"- **{label}**")
return "\n".join(lines)
async def stream_answer(
messages_for_llm: List[Dict[str, Any]],
model_id: str,
provider: str,
api_key: Optional[str],
) -> Iterable[Dict[str, Any]]:
"""
Stream deltas and tool logs from MCPClient.process_single_turn_with_tools.
Yield dicts with partial text plus updated tool logs & citations so the UI can update three panels.
"""
await ensure_init(model_id, provider, api_key)
client = get_mcp_client(model_id, provider, api_key)
tool_log: List[str] = []
citations: List[Tuple[str, Optional[str]]] = [] # (label, url)
# Early token check
if not api_key:
yield {
"delta": (
"⚠️ Missing token: set `HUGGING_FACE_HUB_TOKEN` or `HF_TOKEN` "
"in your Space **Settings β†’ Secrets** to authenticate model calls."
),
"tool_log": _format_tool_log(tool_log),
"citations": _format_citations(citations),
}
return
try:
async for chunk in client.process_single_turn_with_tools(messages_for_llm):
if isinstance(chunk, dict):
ctype = chunk.get("type")
if ctype == "tool_log":
# Example: {"type": "tool_log", "tool": "...", "status": "started/finished"}
name = chunk.get("tool", "tool")
status = chunk.get("status", "")
_append_log(tool_log, f"- {name} **{status}**")
yield {"delta": "", "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
elif ctype == "text_delta":
yield {"delta": chunk.get("delta", ""), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
elif ctype == "text":
yield {"delta": chunk.get("text", ""), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
elif ctype == "tool_result":
# Capture a useful citation label and optional URL if present
tool_name = chunk.get("tool", "tool")
content = chunk.get("content")
url = None
if isinstance(content, dict):
url = content.get("url") or content.get("link")
title = content.get("title") or content.get("name")
label = title or tool_name
elif isinstance(content, str):
label = tool_name
if "http://" in content or "https://" in content:
start = content.find("http")
url = content[start : start + 200].split("\n")[0].strip()
else:
label = tool_name
citations.append((label, url))
_append_log(tool_log, f" β€’ {tool_name} returned result")
snippet = ""
if isinstance(content, str):
snippet = content.strip()
if len(snippet) > 700:
snippet = snippet[:700] + "…"
snippet = f"\n\n**Result (from {tool_name}):**\n{snippet}"
yield {
"delta": snippet,
"tool_log": _format_tool_log(tool_log),
"citations": _format_citations(citations),
}
else:
# Fallback if provider yields plain strings
yield {"delta": str(chunk), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
except Exception as e:
msg = str(e)
if "401" in msg or "Unauthorized" in msg:
err = (
"❌ Unauthorized (401). Ensure your token is set and permitted for the chosen model/provider.\n"
f"- Model: `{model_id}`\n- Provider: `{provider}`\n"
)
elif "404" in msg or "Not Found" in msg:
err = (
"❌ Model not found (404). The default model may not be available via hf-inference.\n"
"Consider setting `CHAT_MODEL` in your Space settings to a model that supports chat via the HF router."
)
else:
err = f"❌ Error: {msg}"
yield {"delta": err, "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
# ----------------------------
# Gradio UI
# ----------------------------
with gr.Blocks(fill_height=True) as demo:
gr.Markdown(
"# πŸ€– Gradio Docs Chat (MCP Client)\n"
"Ask anything about **Gradio**. Answers are grounded in the official docs via MCP tools."
)
with gr.Row():
with gr.Column(scale=7):
chat = gr.Chatbot(
label="Gradio Docs Assistant",
height=520,
type="messages", # expects: [{"role": "...", "content": "..."}]
)
with gr.Row():
msg = gr.Textbox(
placeholder="e.g., How do I use gr.Interface with multiple inputs?",
scale=9,
autofocus=True,
)
send_btn = gr.Button("Send", scale=1, variant="primary")
clear = gr.ClearButton(components=[chat, msg], value="Clear")
with gr.Column(scale=5):
with gr.Accordion("βš™οΈ Settings", open=False):
style = gr.Radio(
label="Answer Style",
choices=["Concise", "Detailed"],
value="Detailed",
)
model_read = gr.Markdown(
f"**Model:** `{MODEL_ID}` \n**Provider:** `{PROVIDER}` \n"
"_(override via Space Secrets: `CHAT_MODEL`, `CHAT_PROVIDER`)_"
)
with gr.Accordion("πŸ›  Tool Activity (live)", open=True):
tool_log_md = gr.Markdown("_No tool activity yet._")
with gr.Accordion("πŸ“Ž Citations (recent)", open=True):
citations_md = gr.Markdown("_No citations captured yet._")
async def on_submit(user_msg: str, history_msgs: List[Dict[str, Any]], style_choice: str):
"""
Append the user's message, then stream the assistant reply while updating:
- chat text
- tool activity
- citations
"""
# Start a new assistant message for streaming
history_msgs = (history_msgs or []) + [{"role": "user", "content": user_msg}]
history_msgs.append({"role": "assistant", "content": ""})
yield history_msgs, gr.update(value="_No tool activity yet._"), gr.update(value="_No citations captured yet._")
# Compose messages for LLM
messages_for_llm = to_llm_messages(history_msgs[:-1], user_msg, style_choice)
async for chunk in stream_answer(messages_for_llm, MODEL_ID, PROVIDER, HF_TOKEN):
delta = chunk.get("delta", "")
if delta:
history_msgs[-1]["content"] += delta
yield history_msgs, gr.update(value=chunk.get("tool_log", "")), gr.update(value=chunk.get("citations", ""))
# Wire both Enter and button click; also pass "style"
msg.submit(on_submit, inputs=[msg, chat, style], outputs=[chat, tool_log_md, citations_md], queue=True)
send_btn.click(on_submit, inputs=[msg, chat, style], outputs=[chat, tool_log_md, citations_md], queue=True)
# ----------------------------
# Gradio runtime (queue + launch)
# ----------------------------
# IMPORTANT: assign the queued app back to 'demo' for older Gradio versions.
demo = demo.queue(max_size=32)
# Always launch; Spaces runs this script directly. banner lines about "local URL" are normal.
demo.launch(
ssr_mode=False
)