gradio-doc / app.py
sudipta26889's picture
Update app.py
1c5b9ef verified
raw
history blame
7.15 kB
# app.py
# Hugging Face Space: Gradio app that chats about Gradio docs via the Gradio Docs MCP server.
# Requirements:
# - gradio
# - huggingface_hub
#
# Space secret needed:
# - HUGGING_FACE_HUB_TOKEN (or HF_TOKEN)
import os
import asyncio
from typing import Any, Dict, Iterable, List, Optional
import gradio as gr
from huggingface_hub import MCPClient
# ----------------------------
# Configuration
# ----------------------------
GRADIO_DOCS_MCP_SSE = os.environ.get(
"GRADIO_DOCS_MCP_SSE",
"https://gradio-docs-mcp.hf.space/gradio_api/mcp/sse",
)
# Choose a tool-capable chat model you have access to
MODEL_ID = os.environ.get("CHAT_MODEL", "meta-llama/Meta-Llama-3.1-8B-Instruct")
PROVIDER = os.environ.get("CHAT_PROVIDER", "hf-inference") # e.g., "hf-inference"
HF_TOKEN = os.environ.get("HUGGING_FACE_HUB_TOKEN") or os.environ.get("HF_TOKEN")
SYSTEM_PROMPT = (
"You are a helpful assistant that answers questions strictly using the Gradio documentation "
"via the MCP tools provided by the Gradio Docs MCP server. Prefer the latest docs. "
"When helpful, cite classes/functions (e.g., gr.Interface) and include short code examples."
)
# ----------------------------
# MCP Client (lazy init)
# ----------------------------
mcp_client: Optional[MCPClient] = None
_initialized = False
_init_lock = asyncio.Lock()
def get_mcp_client() -> MCPClient:
global mcp_client
if mcp_client is None:
# Pass api_key so provider calls are authenticated (fixes 401)
mcp_client = MCPClient(model=MODEL_ID, provider=PROVIDER, api_key=HF_TOKEN)
return mcp_client
async def ensure_init():
"""
Lazily attach the Gradio Docs MCP server. In some huggingface_hub versions
add_mcp_server is async, so it must be awaited exactly once.
"""
global _initialized
if _initialized:
return
async with _init_lock:
if _initialized:
return
client = get_mcp_client()
# Await the coroutine to avoid "was never awaited" warning
await client.add_mcp_server(
type="sse",
url=GRADIO_DOCS_MCP_SSE,
timeout=30,
# headers={"Authorization": f"Bearer {HF_TOKEN}"} # Not needed for public server
)
_initialized = True
# ----------------------------
# Helpers for messages
# ----------------------------
def to_llm_messages(history_msgs: List[Dict[str, Any]], user_msg: str) -> List[Dict[str, Any]]:
"""
Convert Chatbot messages list (role/content dicts) to the LLM format,
with a system message prepended and the new user message appended.
"""
msgs: List[Dict[str, Any]] = [{"role": "system", "content": SYSTEM_PROMPT}]
# Keep only role/content keys
for m in history_msgs:
role = m.get("role")
content = m.get("content")
if role in ("user", "assistant") and isinstance(content, str):
msgs.append({"role": role, "content": content})
msgs.append({"role": "user", "content": user_msg})
return msgs
async def stream_answer(messages_for_llm: List[Dict[str, Any]]) -> Iterable[str]:
"""
Stream deltas and tool logs from MCPClient.process_single_turn_with_tools.
"""
await ensure_init()
client = get_mcp_client()
# Helpful pre-flight checks
if not HF_TOKEN:
yield (
"⚠️ Missing token: set `HUGGING_FACE_HUB_TOKEN` (or `HF_TOKEN`) "
"in your Space **Settings → Secrets** to authenticate the model call."
)
return
try:
async for chunk in client.process_single_turn_with_tools(messages_for_llm):
# chunk is a dict describing text deltas and tool activity
if isinstance(chunk, dict):
ctype = chunk.get("type")
if ctype == "tool_log":
name = chunk.get("tool", "tool")
status = chunk.get("status", "")
yield f"\n\n_(using **{name}** {status})_"
elif ctype == "text_delta":
yield chunk.get("delta", "")
elif ctype == "text":
yield chunk.get("text", "")
elif ctype == "tool_result":
content = chunk.get("content")
if isinstance(content, str) and content.strip():
yield f"\n\n**Result:**\n{content}"
else:
# Fallback if provider yields plain strings
yield str(chunk)
except Exception as e:
msg = str(e)
if "401" in msg or "Unauthorized" in msg:
yield (
"❌ Unauthorized (401). Your model call was rejected.\n\n"
"- Ensure the Space secret `HUGGING_FACE_HUB_TOKEN` is set and valid.\n"
"- Confirm the selected `CHAT_MODEL` is accessible with your token.\n"
"- If using `hf-inference`, verify your org access/limits."
)
else:
yield f"❌ Error: {msg}"
# ----------------------------
# Gradio UI
# ----------------------------
with gr.Blocks(fill_height=True) as demo:
gr.Markdown(
"# 🤖 Gradio Docs Chat (MCP Client)\n"
"Ask anything about **Gradio**. Answers are grounded in the official docs via MCP."
)
# Using the new messages format
chat = gr.Chatbot(
label="Gradio Docs Assistant",
height=520,
type="messages", # expects a list of dicts: {"role": "...", "content": "..."}
)
with gr.Row():
msg = gr.Textbox(
placeholder="e.g., How do I use gr.Interface with multiple inputs?",
scale=9,
autofocus=True,
)
send_btn = gr.Button("Send", scale=1, variant="primary")
with gr.Row():
clear = gr.ClearButton(components=[chat, msg], value="Clear")
info = gr.Markdown(
f"**Model:** `{MODEL_ID}` · **Provider:** `{PROVIDER}` · **MCP:** Gradio Docs SSE",
)
async def on_submit(user_msg: str, history_msgs: List[Dict[str, Any]]):
"""
history_msgs is a list of {"role": ..., "content": ...} dicts.
We append the user's message, then stream the assistant reply by
updating/overwriting the last assistant message content.
"""
history_msgs = (history_msgs or []) + [{"role": "user", "content": user_msg}]
# Add a placeholder assistant message to stream into
history_msgs.append({"role": "assistant", "content": ""})
yield history_msgs
# Build LLM messages and stream chunks
messages_for_llm = to_llm_messages(history_msgs[:-1], user_msg)
async for delta in stream_answer(messages_for_llm):
# Append the delta to the last assistant message
history_msgs[-1]["content"] += delta
yield history_msgs
# Wire both Enter and button click
msg.submit(on_submit, inputs=[msg, chat], outputs=chat, queue=True)
send_btn.click(on_submit, inputs=[msg, chat], outputs=chat, queue=True)
# For local dev; on Spaces, Gradio calls launch automatically.
if __name__ == "__main__":
demo.launch()