Spaces:
Sleeping
Sleeping
# app.py | |
# Hugging Face Space: Gradio app that chats about Gradio docs via the Gradio Docs MCP server. | |
# Requirements: | |
# - gradio | |
# - huggingface_hub | |
# | |
# Space secret needed: | |
# - HUGGING_FACE_HUB_TOKEN (or HF_TOKEN) | |
import os | |
import asyncio | |
from typing import Any, Dict, Iterable, List, Optional | |
import gradio as gr | |
from huggingface_hub import MCPClient | |
# ---------------------------- | |
# Configuration | |
# ---------------------------- | |
GRADIO_DOCS_MCP_SSE = os.environ.get( | |
"GRADIO_DOCS_MCP_SSE", | |
"https://gradio-docs-mcp.hf.space/gradio_api/mcp/sse", | |
) | |
# Choose a tool-capable chat model you have access to | |
MODEL_ID = os.environ.get("CHAT_MODEL", "meta-llama/Meta-Llama-3.1-8B-Instruct") | |
PROVIDER = os.environ.get("CHAT_PROVIDER", "hf-inference") # e.g., "hf-inference" | |
HF_TOKEN = os.environ.get("HUGGING_FACE_HUB_TOKEN") or os.environ.get("HF_TOKEN") | |
SYSTEM_PROMPT = ( | |
"You are a helpful assistant that answers questions strictly using the Gradio documentation " | |
"via the MCP tools provided by the Gradio Docs MCP server. Prefer the latest docs. " | |
"When helpful, cite classes/functions (e.g., gr.Interface) and include short code examples." | |
) | |
# ---------------------------- | |
# MCP Client (lazy init) | |
# ---------------------------- | |
mcp_client: Optional[MCPClient] = None | |
_initialized = False | |
_init_lock = asyncio.Lock() | |
def get_mcp_client() -> MCPClient: | |
global mcp_client | |
if mcp_client is None: | |
# Pass api_key so provider calls are authenticated (fixes 401) | |
mcp_client = MCPClient(model=MODEL_ID, provider=PROVIDER, api_key=HF_TOKEN) | |
return mcp_client | |
async def ensure_init(): | |
""" | |
Lazily attach the Gradio Docs MCP server. In some huggingface_hub versions | |
add_mcp_server is async, so it must be awaited exactly once. | |
""" | |
global _initialized | |
if _initialized: | |
return | |
async with _init_lock: | |
if _initialized: | |
return | |
client = get_mcp_client() | |
# Await the coroutine to avoid "was never awaited" warning | |
await client.add_mcp_server( | |
type="sse", | |
url=GRADIO_DOCS_MCP_SSE, | |
timeout=30, | |
# headers={"Authorization": f"Bearer {HF_TOKEN}"} # Not needed for public server | |
) | |
_initialized = True | |
# ---------------------------- | |
# Helpers for messages | |
# ---------------------------- | |
def to_llm_messages(history_msgs: List[Dict[str, Any]], user_msg: str) -> List[Dict[str, Any]]: | |
""" | |
Convert Chatbot messages list (role/content dicts) to the LLM format, | |
with a system message prepended and the new user message appended. | |
""" | |
msgs: List[Dict[str, Any]] = [{"role": "system", "content": SYSTEM_PROMPT}] | |
# Keep only role/content keys | |
for m in history_msgs: | |
role = m.get("role") | |
content = m.get("content") | |
if role in ("user", "assistant") and isinstance(content, str): | |
msgs.append({"role": role, "content": content}) | |
msgs.append({"role": "user", "content": user_msg}) | |
return msgs | |
async def stream_answer(messages_for_llm: List[Dict[str, Any]]) -> Iterable[str]: | |
""" | |
Stream deltas and tool logs from MCPClient.process_single_turn_with_tools. | |
""" | |
await ensure_init() | |
client = get_mcp_client() | |
# Helpful pre-flight checks | |
if not HF_TOKEN: | |
yield ( | |
"⚠️ Missing token: set `HUGGING_FACE_HUB_TOKEN` (or `HF_TOKEN`) " | |
"in your Space **Settings → Secrets** to authenticate the model call." | |
) | |
return | |
try: | |
async for chunk in client.process_single_turn_with_tools(messages_for_llm): | |
# chunk is a dict describing text deltas and tool activity | |
if isinstance(chunk, dict): | |
ctype = chunk.get("type") | |
if ctype == "tool_log": | |
name = chunk.get("tool", "tool") | |
status = chunk.get("status", "") | |
yield f"\n\n_(using **{name}** {status})_" | |
elif ctype == "text_delta": | |
yield chunk.get("delta", "") | |
elif ctype == "text": | |
yield chunk.get("text", "") | |
elif ctype == "tool_result": | |
content = chunk.get("content") | |
if isinstance(content, str) and content.strip(): | |
yield f"\n\n**Result:**\n{content}" | |
else: | |
# Fallback if provider yields plain strings | |
yield str(chunk) | |
except Exception as e: | |
msg = str(e) | |
if "401" in msg or "Unauthorized" in msg: | |
yield ( | |
"❌ Unauthorized (401). Your model call was rejected.\n\n" | |
"- Ensure the Space secret `HUGGING_FACE_HUB_TOKEN` is set and valid.\n" | |
"- Confirm the selected `CHAT_MODEL` is accessible with your token.\n" | |
"- If using `hf-inference`, verify your org access/limits." | |
) | |
else: | |
yield f"❌ Error: {msg}" | |
# ---------------------------- | |
# Gradio UI | |
# ---------------------------- | |
with gr.Blocks(fill_height=True) as demo: | |
gr.Markdown( | |
"# 🤖 Gradio Docs Chat (MCP Client)\n" | |
"Ask anything about **Gradio**. Answers are grounded in the official docs via MCP." | |
) | |
# Using the new messages format | |
chat = gr.Chatbot( | |
label="Gradio Docs Assistant", | |
height=520, | |
type="messages", # expects a list of dicts: {"role": "...", "content": "..."} | |
) | |
with gr.Row(): | |
msg = gr.Textbox( | |
placeholder="e.g., How do I use gr.Interface with multiple inputs?", | |
scale=9, | |
autofocus=True, | |
) | |
send_btn = gr.Button("Send", scale=1, variant="primary") | |
with gr.Row(): | |
clear = gr.ClearButton(components=[chat, msg], value="Clear") | |
info = gr.Markdown( | |
f"**Model:** `{MODEL_ID}` · **Provider:** `{PROVIDER}` · **MCP:** Gradio Docs SSE", | |
) | |
async def on_submit(user_msg: str, history_msgs: List[Dict[str, Any]]): | |
""" | |
history_msgs is a list of {"role": ..., "content": ...} dicts. | |
We append the user's message, then stream the assistant reply by | |
updating/overwriting the last assistant message content. | |
""" | |
history_msgs = (history_msgs or []) + [{"role": "user", "content": user_msg}] | |
# Add a placeholder assistant message to stream into | |
history_msgs.append({"role": "assistant", "content": ""}) | |
yield history_msgs | |
# Build LLM messages and stream chunks | |
messages_for_llm = to_llm_messages(history_msgs[:-1], user_msg) | |
async for delta in stream_answer(messages_for_llm): | |
# Append the delta to the last assistant message | |
history_msgs[-1]["content"] += delta | |
yield history_msgs | |
# Wire both Enter and button click | |
msg.submit(on_submit, inputs=[msg, chat], outputs=chat, queue=True) | |
send_btn.click(on_submit, inputs=[msg, chat], outputs=chat, queue=True) | |
# For local dev; on Spaces, Gradio calls launch automatically. | |
if __name__ == "__main__": | |
demo.launch() |