Spaces:
Sleeping
Sleeping
# app.py | |
# Hugging Face Space: Gradio app that chats about Gradio docs via the Gradio Docs MCP server. | |
# Features: | |
# β’ MCP tool-calling (connects to the official Gradio Docs MCP SSE server) | |
# β’ Streaming responses with live tool logs | |
# β’ Optional "Concise / Detailed" answer style | |
# β’ Lightweight citations panel (summarizes MCP tool hits) | |
# | |
# Requirements (in requirements.txt): | |
# gradio>=4.44.0 | |
# huggingface_hub>=0.24.0 | |
# spaces # only needed if your Space hardware is ZeroGPU | |
# | |
# Space secrets needed: | |
# - HUGGING_FACE_HUB_TOKEN or HF_TOKEN (token must allow using the selected provider/model) | |
# | |
# Notes: | |
# - The default model/provider below are known to work with chat + tool calling via the HF router. | |
# - You can override via Space Secrets: CHAT_MODEL, CHAT_PROVIDER, GRADIO_DOCS_MCP_SSE. | |
import os | |
import asyncio | |
import inspect | |
from typing import Any, Dict, Iterable, List, Optional, Tuple | |
import gradio as gr | |
from huggingface_hub import MCPClient | |
# ------------------------------------------------- | |
# Optional ZeroGPU shim (silences "No @spaces.GPU..." startup message) | |
# ------------------------------------------------- | |
SPACES_ZERO_GPU = bool(os.environ.get("SPACE_ZERO_GPU", "")) # HF sets this in ZeroGPU envs | |
try: | |
import spaces # type: ignore | |
except Exception: | |
spaces = None # not installed on CPU Spaces; harmless | |
if spaces is not None: | |
# this just lets ZeroGPU detect a GPU-capable function at startup | |
def _zero_gpu_probe(): | |
# We don't actually need a GPU for this app (remote inference), | |
# but the decorated function's presence satisfies ZeroGPU's startup check. | |
return "ok" | |
# ---------------------------- | |
# Configuration | |
# ---------------------------- | |
GRADIO_DOCS_MCP_SSE = os.environ.get( | |
"GRADIO_DOCS_MCP_SSE", | |
"https://gradio-docs-mcp.hf.space/gradio_api/mcp/sse", | |
) | |
# Use a router model that supports OpenAI-style chat + tool calling. | |
MODEL_ID = os.environ.get("CHAT_MODEL", "meta-llama/Meta-Llama-3.1-8B-Instruct") | |
PROVIDER = os.environ.get("CHAT_PROVIDER", "hf-inference") # router | |
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") | |
BASE_SYSTEM_PROMPT = ( | |
"You are a helpful assistant that answers strictly using the Gradio documentation " | |
"via the MCP tools provided by the Gradio Docs MCP server. Prefer the latest docs. " | |
"Cite relevant class/function names (e.g., gr.Interface) and include short code examples when helpful." | |
) | |
CONCISE_SUFFIX = " Keep answers concise (3β6 sentences) unless code is necessary." | |
DETAILED_SUFFIX = " Provide a detailed, step-by-step answer with short code where helpful." | |
# ---------------------------- | |
# MCP Client (lazy init, reconfigurable) | |
# ---------------------------- | |
mcp_client: Optional[MCPClient] = None | |
_initialized = False | |
_init_lock = asyncio.Lock() | |
def _current_system_prompt(style: str) -> str: | |
return BASE_SYSTEM_PROMPT + (CONCISE_SUFFIX if style == "Concise" else DETAILED_SUFFIX) | |
def _reset_client(): | |
"""Reset the global client so a new one is created with updated env (if any).""" | |
global mcp_client, _initialized | |
mcp_client = None | |
_initialized = False | |
def get_mcp_client(model_id: str, provider: str, api_key: Optional[str]) -> MCPClient: | |
global mcp_client | |
if mcp_client is None: | |
mcp_client = MCPClient(model=model_id, provider=provider, api_key=api_key) | |
return mcp_client | |
async def _maybe_await(result): | |
"""Await result if it's an awaitable (handles old/new hub versions).""" | |
if inspect.isawaitable(result): | |
return await result | |
return result | |
async def ensure_init(model_id: str, provider: str, api_key: Optional[str]): | |
"""Attach the Gradio Docs MCP server exactly once (supports async/sync add_mcp_server).""" | |
global _initialized | |
if _initialized: | |
return | |
async with _init_lock: | |
if _initialized: | |
return | |
client = get_mcp_client(model_id, provider, api_key) | |
# add_mcp_server may be coroutine or sync depending on version | |
await _maybe_await( | |
client.add_mcp_server( | |
type="sse", | |
url=GRADIO_DOCS_MCP_SSE, | |
timeout=45, | |
) | |
) | |
_initialized = True | |
# ---------------------------- | |
# Message helpers | |
# ---------------------------- | |
def to_llm_messages(history_msgs: List[Dict[str, Any]], user_msg: str, style: str) -> List[Dict[str, Any]]: | |
""" | |
Convert Chatbot messages list (role/content dicts) to the LLM format, | |
with a system message prepended and the new user message appended. | |
""" | |
msgs: List[Dict[str, Any]] = [{"role": "system", "content": _current_system_prompt(style)}] | |
for m in history_msgs or []: | |
role = m.get("role") | |
content = m.get("content") | |
if role in ("user", "assistant") and isinstance(content, str): | |
msgs.append({"role": role, "content": content}) | |
msgs.append({"role": "user", "content": user_msg}) | |
return msgs | |
# ---------------------------- | |
# Streaming + side-panels (tool logs & citations) | |
# ---------------------------- | |
def _append_log(log_lines: List[str], line: str, max_lines: int = 200) -> None: | |
log_lines.append(line) | |
if len(log_lines) > max_lines: | |
del log_lines[: len(log_lines) - max_lines] | |
def _format_tool_log(log_lines: List[str]) -> str: | |
if not log_lines: | |
return "_No tool activity yet._" | |
return "\n".join(log_lines) | |
def _format_citations(cites: List[Tuple[str, Optional[str]]]) -> str: | |
if not cites: | |
return "_No citations captured yet._" | |
last = cites[-12:] # show recent | |
lines = [] | |
for (label, url) in last: | |
if url: | |
lines.append(f"- **{label}** β {url}") | |
else: | |
lines.append(f"- **{label}**") | |
return "\n".join(lines) | |
async def stream_answer( | |
messages_for_llm: List[Dict[str, Any]], | |
model_id: str, | |
provider: str, | |
api_key: Optional[str], | |
) -> Iterable[Dict[str, Any]]: | |
""" | |
Stream deltas and tool logs from MCPClient.process_single_turn_with_tools. | |
Yield dicts with partial text plus updated tool logs & citations so the UI can update three panels. | |
""" | |
await ensure_init(model_id, provider, api_key) | |
client = get_mcp_client(model_id, provider, api_key) | |
tool_log: List[str] = [] | |
citations: List[Tuple[str, Optional[str]]] = [] # (label, url) | |
# Early token check | |
if not api_key: | |
yield { | |
"delta": ( | |
"β οΈ Missing token: set `HUGGING_FACE_HUB_TOKEN` or `HF_TOKEN` " | |
"in your Space **Settings β Secrets** to authenticate model calls." | |
), | |
"tool_log": _format_tool_log(tool_log), | |
"citations": _format_citations(citations), | |
} | |
return | |
try: | |
async for chunk in client.process_single_turn_with_tools(messages_for_llm): | |
if isinstance(chunk, dict): | |
ctype = chunk.get("type") | |
if ctype == "tool_log": | |
# Example: {"type": "tool_log", "tool": "...", "status": "started/finished"} | |
name = chunk.get("tool", "tool") | |
status = chunk.get("status", "") | |
_append_log(tool_log, f"- {name} **{status}**") | |
yield {"delta": "", "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)} | |
elif ctype == "text_delta": | |
yield {"delta": chunk.get("delta", ""), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)} | |
elif ctype == "text": | |
yield {"delta": chunk.get("text", ""), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)} | |
elif ctype == "tool_result": | |
# Capture a useful citation label and optional URL if present | |
tool_name = chunk.get("tool", "tool") | |
content = chunk.get("content") | |
url = None | |
if isinstance(content, dict): | |
url = content.get("url") or content.get("link") | |
title = content.get("title") or content.get("name") | |
label = title or tool_name | |
elif isinstance(content, str): | |
label = tool_name | |
if "http://" in content or "https://" in content: | |
start = content.find("http") | |
url = content[start : start + 200].split("\n")[0].strip() | |
else: | |
label = tool_name | |
citations.append((label, url)) | |
_append_log(tool_log, f" β’ {tool_name} returned result") | |
snippet = "" | |
if isinstance(content, str): | |
snippet = content.strip() | |
if len(snippet) > 700: | |
snippet = snippet[:700] + "β¦" | |
snippet = f"\n\n**Result (from {tool_name}):**\n{snippet}" | |
yield { | |
"delta": snippet, | |
"tool_log": _format_tool_log(tool_log), | |
"citations": _format_citations(citations), | |
} | |
else: | |
# Fallback if provider yields plain strings | |
yield {"delta": str(chunk), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)} | |
except Exception as e: | |
msg = str(e) | |
if "401" in msg or "Unauthorized" in msg: | |
err = ( | |
"β Unauthorized (401). Ensure your token is set and permitted for the chosen model/provider.\n" | |
f"- Model: `{model_id}`\n- Provider: `{provider}`\n" | |
) | |
elif "404" in msg or "Not Found" in msg: | |
err = ( | |
"β Model not found (404). The default model may not be available via hf-inference.\n" | |
"Consider setting `CHAT_MODEL` in your Space settings to a model that supports chat via the HF router." | |
) | |
else: | |
err = f"β Error: {msg}" | |
yield {"delta": err, "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)} | |
# ---------------------------- | |
# Gradio UI | |
# ---------------------------- | |
with gr.Blocks(fill_height=True) as demo: | |
gr.Markdown( | |
"# π€ Gradio Docs Chat (MCP Client)\n" | |
"Ask anything about **Gradio**. Answers are grounded in the official docs via MCP tools." | |
) | |
with gr.Row(): | |
with gr.Column(scale=7): | |
chat = gr.Chatbot( | |
label="Gradio Docs Assistant", | |
height=520, | |
type="messages", # expects: [{"role": "...", "content": "..."}] | |
) | |
with gr.Row(): | |
msg = gr.Textbox( | |
placeholder="e.g., How do I use gr.Interface with multiple inputs?", | |
scale=9, | |
autofocus=True, | |
) | |
send_btn = gr.Button("Send", scale=1, variant="primary") | |
clear = gr.ClearButton(components=[chat, msg], value="Clear") | |
with gr.Column(scale=5): | |
with gr.Accordion("βοΈ Settings", open=False): | |
style = gr.Radio( | |
label="Answer Style", | |
choices=["Concise", "Detailed"], | |
value="Detailed", | |
) | |
model_read = gr.Markdown( | |
f"**Model:** `{MODEL_ID}` \n**Provider:** `{PROVIDER}` \n" | |
"_(override via Space Secrets: `CHAT_MODEL`, `CHAT_PROVIDER`)_" | |
) | |
with gr.Accordion("π Tool Activity (live)", open=True): | |
tool_log_md = gr.Markdown("_No tool activity yet._") | |
with gr.Accordion("π Citations (recent)", open=True): | |
citations_md = gr.Markdown("_No citations captured yet._") | |
async def on_submit(user_msg: str, history_msgs: List[Dict[str, Any]], style_choice: str): | |
""" | |
Append the user's message, then stream the assistant reply while updating: | |
- chat text | |
- tool activity | |
- citations | |
""" | |
# Start a new assistant message for streaming | |
history_msgs = (history_msgs or []) + [{"role": "user", "content": user_msg}] | |
history_msgs.append({"role": "assistant", "content": ""}) | |
yield history_msgs, gr.update(value="_No tool activity yet._"), gr.update(value="_No citations captured yet._") | |
# Compose messages for LLM | |
messages_for_llm = to_llm_messages(history_msgs[:-1], user_msg, style_choice) | |
async for chunk in stream_answer(messages_for_llm, MODEL_ID, PROVIDER, HF_TOKEN): | |
delta = chunk.get("delta", "") | |
if delta: | |
history_msgs[-1]["content"] += delta | |
yield history_msgs, gr.update(value=chunk.get("tool_log", "")), gr.update(value=chunk.get("citations", "")) | |
# Wire both Enter and button click; also pass "style" | |
msg.submit(on_submit, inputs=[msg, chat, style], outputs=[chat, tool_log_md, citations_md], queue=True) | |
send_btn.click(on_submit, inputs=[msg, chat, style], outputs=[chat, tool_log_md, citations_md], queue=True) | |
# ---------------------------- | |
# Gradio runtime (queue + launch) | |
# ---------------------------- | |
# IMPORTANT: assign the queued app back to 'demo' for older Gradio versions. | |
demo = demo.queue(max_size=32) | |
# Always launch; Spaces runs this script directly. banner lines about "local URL" are normal. | |
demo.launch( | |
ssr_mode=False | |
) |