Spaces:
Sleeping
Sleeping
# app.py | |
# Hugging Face Space: Gradio app that chats about Gradio docs via the Gradio Docs MCP server. | |
# - Uses huggingface_hub.MCPClient as an MCP client | |
# - Streams responses while the model calls MCP tools under the hood | |
# - Make sure to set HUGGING_FACE_HUB_TOKEN (or HF_TOKEN) in Space secrets. | |
import os | |
import asyncio | |
from typing import Any, Dict, Iterable, List, Optional | |
import gradio as gr | |
from huggingface_hub import MCPClient | |
# ---------------------------- | |
# Configuration | |
# ---------------------------- | |
GRADIO_DOCS_MCP_SSE = os.environ.get( | |
"GRADIO_DOCS_MCP_SSE", | |
"https://gradio-docs-mcp.hf.space/gradio_api/mcp/sse", | |
) | |
# Choose a tool-capable chat model you have access to | |
MODEL_ID = os.environ.get("CHAT_MODEL", "meta-llama/Meta-Llama-3.1-8B-Instruct") | |
PROVIDER = os.environ.get("CHAT_PROVIDER", "hf-inference") # e.g., "hf-inference" | |
HF_TOKEN = os.environ.get("HUGGING_FACE_HUB_TOKEN") or os.environ.get("HF_TOKEN") | |
SYSTEM_PROMPT = ( | |
"You are a helpful assistant that answers questions strictly using the Gradio documentation " | |
"via the MCP tools provided by the Gradio Docs MCP server. Prefer the latest docs. " | |
"When helpful, cite classes/functions (e.g., gr.Interface) and include short code examples." | |
) | |
# ---------------------------- | |
# MCP Client (lazy init) | |
# ---------------------------- | |
mcp_client: Optional[MCPClient] = None | |
_initialized = False | |
_init_lock = asyncio.Lock() | |
def get_mcp_client() -> MCPClient: | |
global mcp_client | |
if mcp_client is None: | |
# Pass api_key so provider calls are authenticated (fixes 401) | |
mcp_client = MCPClient(model=MODEL_ID, provider=PROVIDER, api_key=HF_TOKEN) | |
return mcp_client | |
async def ensure_init(): | |
""" | |
Lazily attach the Gradio Docs MCP server. In some huggingface_hub versions | |
add_mcp_server is async, so it must be awaited exactly once. | |
""" | |
global _initialized | |
if _initialized: | |
return | |
async with _init_lock: | |
if _initialized: | |
return | |
client = get_mcp_client() | |
# Await the coroutine to avoid "was never awaited" warning | |
await client.add_mcp_server( | |
type="sse", | |
url=GRADIO_DOCS_MCP_SSE, | |
timeout=30, | |
# headers={"Authorization": f"Bearer {HF_TOKEN}"} # Not needed for public server | |
) | |
_initialized = True | |
# ---------------------------- | |
# Chat plumbing | |
# ---------------------------- | |
def to_messages(history: List[List[str]], user_msg: str) -> List[Dict[str, Any]]: | |
""" | |
Convert gr.Chatbot history into an LLM-style message list. | |
""" | |
messages: List[Dict[str, Any]] = [{"role": "system", "content": SYSTEM_PROMPT}] | |
for u, a in history: | |
if u: | |
messages.append({"role": "user", "content": str(u)}) | |
if a: | |
messages.append({"role": "assistant", "content": str(a)}) | |
messages.append({"role": "user", "content": user_msg}) | |
return messages | |
async def stream_answer(messages: List[Dict[str, Any]]) -> Iterable[str]: | |
""" | |
Stream deltas and tool logs from MCPClient.process_single_turn_with_tools. | |
""" | |
await ensure_init() | |
client = get_mcp_client() | |
# Helpful pre-flight checks | |
if not HF_TOKEN: | |
yield ( | |
"⚠️ Missing token: set `HUGGING_FACE_HUB_TOKEN` (or `HF_TOKEN`) " | |
"in your Space **Settings → Secrets** to authenticate the model call." | |
) | |
return | |
try: | |
partial = "" | |
async for chunk in client.process_single_turn_with_tools(messages): | |
# chunk is a dict describing text deltas and tool activity | |
if isinstance(chunk, dict): | |
ctype = chunk.get("type") | |
if ctype == "tool_log": | |
name = chunk.get("tool", "tool") | |
status = chunk.get("status", "") | |
text = f"\n\n_(using **{name}** {status})_" | |
partial += text | |
yield text | |
elif ctype == "text_delta": | |
delta = chunk.get("delta", "") | |
partial += delta | |
yield delta | |
elif ctype == "text": | |
text = chunk.get("text", "") | |
partial += text | |
yield text | |
elif ctype == "tool_result": | |
content = chunk.get("content") | |
if isinstance(content, str) and content.strip(): | |
text = f"\n\n**Result:**\n{content}" | |
partial += text | |
yield text | |
else: | |
# Fallback if provider yields plain strings | |
s = str(chunk) | |
partial += s | |
yield s | |
except Exception as e: | |
# Surface common errors nicely | |
msg = str(e) | |
if "401" in msg or "Unauthorized" in msg: | |
yield ( | |
"❌ Unauthorized (401). Your model call was rejected.\n\n" | |
"- Ensure the Space secret `HUGGING_FACE_HUB_TOKEN` is set and valid.\n" | |
"- Confirm the selected `CHAT_MODEL` is accessible with your token.\n" | |
"- If using `hf-inference`, verify your org access/limits." | |
) | |
else: | |
yield f"❌ Error: {msg}" | |
async def respond(user_msg: str, history: List[List[str]]): | |
messages = to_messages(history, user_msg) | |
async for piece in stream_answer(messages): | |
yield piece | |
# ---------------------------- | |
# Gradio UI | |
# ---------------------------- | |
with gr.Blocks(fill_height=True) as demo: | |
gr.Markdown( | |
"# 🤖 Gradio Docs Chat (MCP Client)\n" | |
"Ask anything about **Gradio**. Answers are grounded in the official docs via MCP." | |
) | |
chat = gr.Chatbot( | |
label="Gradio Docs Assistant", | |
height=520, | |
type="messages", # keeps history as list[[user, assistant], ...] | |
avatar_images=(None, None), | |
likeable=True, | |
) | |
with gr.Row(): | |
msg = gr.Textbox( | |
placeholder="e.g., How do I use gr.Interface with multiple inputs?", | |
scale=9, | |
autofocus=True, | |
) | |
send_btn = gr.Button("Send", scale=1, variant="primary") | |
with gr.Row(): | |
clear = gr.ClearButton(components=[chat, msg], value="Clear") | |
info = gr.Markdown( | |
f"**Model:** `{MODEL_ID}` · **Provider:** `{PROVIDER}` · " | |
f"**MCP:** Gradio Docs SSE", | |
elem_classes=["text-sm", "opacity-70"], | |
) | |
async def on_submit(user_msg: str, history: List[List[str]]): | |
# Append a placeholder response to stream into | |
history = (history or []) + [[user_msg, ""]] | |
# Stream chunks into the last assistant message | |
async for chunk in respond(user_msg, history[:-1]): | |
history[-1][1] = chunk | |
yield history | |
# Wire both Enter and button click | |
msg.submit(on_submit, inputs=[msg, chat], outputs=chat, queue=True) | |
send_btn.click(on_submit, inputs=[msg, chat], outputs=chat, queue=True) | |
# For local dev; on Spaces, Gradio calls launch automatically. | |
if __name__ == "__main__": | |
demo.launch() |