Spaces:
Sleeping
Sleeping
# app.py | |
# Hugging Face Space: Gradio app that chats about Gradio docs via the Gradio Docs MCP server. | |
# Features: | |
# β’ MCP tool-calling (connects to the official Gradio Docs MCP SSE server) | |
# β’ Streaming responses with live tool logs | |
# β’ Optional "Concise / Detailed" answer style | |
# β’ Lightweight citations panel (summarizes MCP tool hits) | |
# | |
# Requirements: | |
# - gradio | |
# - huggingface_hub | |
# | |
# Space secrets needed: | |
# - HUGGING_FACE_HUB_TOKEN or HF_TOKEN (token must allow using the selected provider/model) | |
# | |
# Notes: | |
# - The default model/provider below are known to work with chat + tool calling via the HF router. | |
# - You can override via Space Secrets: CHAT_MODEL, CHAT_PROVIDER. | |
import os | |
import asyncio | |
from typing import Any, Dict, Iterable, List, Optional, Tuple | |
import gradio as gr | |
from huggingface_hub import MCPClient | |
# ---------------------------- | |
# Configuration | |
# ---------------------------- | |
GRADIO_DOCS_MCP_SSE = os.environ.get( | |
"GRADIO_DOCS_MCP_SSE", | |
"https://gradio-docs-mcp.hf.space/gradio_api/mcp/sse", | |
) | |
# Use a router model that supports OpenAI-style chat + tool calling. | |
# You can override these with Space Secrets if you like. | |
MODEL_ID = os.environ.get("CHAT_MODEL", "Qwen/Qwen2.5-7B-Instruct") | |
PROVIDER = os.environ.get("CHAT_PROVIDER", "hf-inference") # router | |
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") | |
BASE_SYSTEM_PROMPT = ( | |
"You are a helpful assistant that answers strictly using the Gradio documentation " | |
"via the MCP tools provided by the Gradio Docs MCP server. Prefer the latest docs. " | |
"Cite relevant class/function names (e.g., gr.Interface) and include short code examples when helpful." | |
) | |
CONCISE_SUFFIX = " Keep answers concise (3β6 sentences) unless code is necessary." | |
DETAILED_SUFFIX = " Provide a detailed, step-by-step answer with short code where helpful." | |
# ---------------------------- | |
# MCP Client (lazy init, reconfigurable) | |
# ---------------------------- | |
mcp_client: Optional[MCPClient] = None | |
_initialized = False | |
_init_lock = asyncio.Lock() | |
def _current_system_prompt(style: str) -> str: | |
if style == "Concise": | |
return BASE_SYSTEM_PROMPT + CONCISE_SUFFIX | |
return BASE_SYSTEM_PROMPT + DETAILED_SUFFIX | |
def _reset_client(): | |
"""Reset the global client so a new one is created with updated env (if any).""" | |
global mcp_client, _initialized | |
mcp_client = None | |
_initialized = False | |
def get_mcp_client(model_id: str, provider: str, api_key: Optional[str]) -> MCPClient: | |
global mcp_client | |
if mcp_client is None: | |
mcp_client = MCPClient(model=model_id, provider=provider, api_key=api_key) | |
return mcp_client | |
async def ensure_init(model_id: str, provider: str, api_key: Optional[str]): | |
"""Attach the Gradio Docs MCP server exactly once.""" | |
global _initialized | |
if _initialized: | |
return | |
async with _init_lock: | |
if _initialized: | |
return | |
client = get_mcp_client(model_id, provider, api_key) | |
# add_mcp_server may be a coroutine; await to avoid warnings | |
await client.add_mcp_server( | |
type="sse", | |
url=GRADIO_DOCS_MCP_SSE, | |
timeout=45, | |
) | |
_initialized = True | |
# ---------------------------- | |
# Message helpers | |
# ---------------------------- | |
def to_llm_messages(history_msgs: List[Dict[str, Any]], user_msg: str, style: str) -> List[Dict[str, Any]]: | |
""" | |
Convert Chatbot messages list (role/content dicts) to the LLM format, | |
with a system message prepended and the new user message appended. | |
""" | |
msgs: List[Dict[str, Any]] = [{"role": "system", "content": _current_system_prompt(style)}] | |
for m in history_msgs or []: | |
role = m.get("role") | |
content = m.get("content") | |
if role in ("user", "assistant") and isinstance(content, str): | |
msgs.append({"role": role, "content": content}) | |
msgs.append({"role": "user", "content": user_msg}) | |
return msgs | |
# ---------------------------- | |
# Streaming + side-panels (tool logs & citations) | |
# ---------------------------- | |
def _append_log(log_lines: List[str], line: str, max_lines: int = 200) -> None: | |
log_lines.append(line) | |
if len(log_lines) > max_lines: | |
del log_lines[: len(log_lines) - max_lines] | |
def _format_tool_log(log_lines: List[str]) -> str: | |
if not log_lines: | |
return "_No tool activity yet._" | |
return "\n".join(log_lines) | |
def _format_citations(cites: List[Tuple[str, Optional[str]]]) -> str: | |
if not cites: | |
return "_No citations captured yet._" | |
# Show up to the last 12 citations (tool name + optional URL/source) | |
last = cites[-12:] | |
lines = [] | |
for (label, url) in last: | |
if url: | |
lines.append(f"- **{label}** β {url}") | |
else: | |
lines.append(f"- **{label}**") | |
return "\n".join(lines) | |
async def stream_answer( | |
messages_for_llm: List[Dict[str, Any]], | |
model_id: str, | |
provider: str, | |
api_key: Optional[str], | |
) -> Iterable[Dict[str, Any]]: | |
""" | |
Stream deltas and tool logs from MCPClient.process_single_turn_with_tools. | |
Yield dicts with partial text plus updated tool logs & citations so the UI can update three panels. | |
""" | |
await ensure_init(model_id, provider, api_key) | |
client = get_mcp_client(model_id, provider, api_key) | |
tool_log: List[str] = [] | |
citations: List[Tuple[str, Optional[str]]] = [] # (label, url) | |
# Early token check | |
if not api_key: | |
yield { | |
"delta": ( | |
"β οΈ Missing token: set `HUGGING_FACE_HUB_TOKEN` or `HF_TOKEN` " | |
"in your Space **Settings β Secrets** to authenticate model calls." | |
), | |
"tool_log": _format_tool_log(tool_log), | |
"citations": _format_citations(citations), | |
} | |
return | |
try: | |
async for chunk in client.process_single_turn_with_tools(messages_for_llm): | |
if isinstance(chunk, dict): | |
ctype = chunk.get("type") | |
if ctype == "tool_log": | |
# Example: {"type": "tool_log", "tool": "gradio_docs_mcp_search_gradio_docs", "status": "started/finished"} | |
name = chunk.get("tool", "tool") | |
status = chunk.get("status", "") | |
_append_log(tool_log, f"- {name} **{status}**") | |
yield {"delta": "", "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)} | |
elif ctype == "text_delta": | |
yield {"delta": chunk.get("delta", ""), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)} | |
elif ctype == "text": | |
yield {"delta": chunk.get("text", ""), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)} | |
elif ctype == "tool_result": | |
# Try to capture a useful citation label and optional URL if present | |
tool_name = chunk.get("tool", "tool") | |
content = chunk.get("content") | |
url = None | |
if isinstance(content, dict): | |
# Some servers return {"url": "...", "title": "...", ...} | |
url = content.get("url") or content.get("link") | |
title = content.get("title") or content.get("name") | |
label = title or tool_name | |
elif isinstance(content, str): | |
# Heuristic: look for a URL-ish substring | |
label = tool_name | |
# very light heuristic (not strict URL parse) | |
if "http://" in content or "https://" in content: | |
start = content.find("http") | |
# cut a short preview | |
url = content[start : start + 200].split("\n")[0].strip() | |
else: | |
label = tool_name | |
citations.append((label, url)) | |
_append_log(tool_log, f" β’ {tool_name} returned result") | |
# Also echo a short "Result:" block into the chat for transparency (truncated) | |
snippet = "" | |
if isinstance(content, str): | |
snippet = content.strip() | |
if len(snippet) > 700: | |
snippet = snippet[:700] + "β¦" | |
snippet = f"\n\n**Result (from {tool_name}):**\n{snippet}" | |
yield { | |
"delta": snippet, | |
"tool_log": _format_tool_log(tool_log), | |
"citations": _format_citations(citations), | |
} | |
else: | |
# Fallback if provider yields plain strings | |
yield {"delta": str(chunk), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)} | |
except Exception as e: | |
msg = str(e) | |
if "401" in msg or "Unauthorized" in msg: | |
err = ( | |
"β Unauthorized (401). Ensure your token is set and permitted for the chosen model/provider.\n" | |
f"- Model: `{model_id}`\n- Provider: `{provider}`\n" | |
) | |
else: | |
err = f"β Error: {msg}" | |
yield {"delta": err, "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)} | |
# ---------------------------- | |
# Gradio UI | |
# ---------------------------- | |
with gr.Blocks(fill_height=True) as demo: | |
gr.Markdown( | |
"# π€ Gradio Docs Chat (MCP Client)\n" | |
"Ask anything about **Gradio**. Answers are grounded in the official docs via MCP tools." | |
) | |
with gr.Row(): | |
with gr.Column(scale=7): | |
chat = gr.Chatbot( | |
label="Gradio Docs Assistant", | |
height=520, | |
type="messages", # expects: [{"role": "...", "content": "..."}] | |
) | |
with gr.Row(): | |
msg = gr.Textbox( | |
placeholder="e.g., How do I use gr.Interface with multiple inputs?", | |
scale=9, | |
autofocus=True, | |
) | |
send_btn = gr.Button("Send", scale=1, variant="primary") | |
clear = gr.ClearButton(components=[chat, msg], value="Clear") | |
with gr.Column(scale=5): | |
with gr.Accordion("βοΈ Settings", open=False): | |
style = gr.Radio( | |
label="Answer Style", | |
choices=["Concise", "Detailed"], | |
value="Detailed", | |
) | |
model_read = gr.Markdown( | |
f"**Model:** `{MODEL_ID}` \n**Provider:** `{PROVIDER}` \n" | |
"_(override via Space Secrets: `CHAT_MODEL`, `CHAT_PROVIDER`)_" | |
) | |
with gr.Accordion("π Tool Activity (live)", open=True): | |
tool_log_md = gr.Markdown("_No tool activity yet._") | |
with gr.Accordion("π Citations (recent)", open=True): | |
citations_md = gr.Markdown("_No citations captured yet._") | |
async def on_submit(user_msg: str, history_msgs: List[Dict[str, Any]], style_choice: str): | |
""" | |
Append the user's message, then stream the assistant reply while updating: | |
- chat text | |
- tool activity | |
- citations | |
""" | |
# Start a new assistant message for streaming | |
history_msgs = (history_msgs or []) + [{"role": "user", "content": user_msg}] | |
history_msgs.append({"role": "assistant", "content": ""}) | |
yield history_msgs, gr.update(value="_No tool activity yet._"), gr.update(value="_No citations captured yet._") | |
# Compose messages for LLM | |
messages_for_llm = to_llm_messages(history_msgs[:-1], user_msg, style_choice) | |
async for chunk in stream_answer(messages_for_llm, MODEL_ID, PROVIDER, HF_TOKEN): | |
delta = chunk.get("delta", "") | |
if delta: | |
history_msgs[-1]["content"] += delta | |
yield history_msgs, gr.update(value=chunk.get("tool_log", "")), gr.update(value=chunk.get("citations", "")) | |
# Wire both Enter and button click; also pass "style" | |
msg.submit(on_submit, inputs=[msg, chat, style], outputs=[chat, tool_log_md, citations_md], queue=True) | |
send_btn.click(on_submit, inputs=[msg, chat, style], outputs=[chat, tool_log_md, citations_md], queue=True) | |
# For local dev; on Spaces, Gradio calls launch automatically. | |
if __name__ == "__main__": | |
demo.launch() |