Spaces:
Sleeping
Sleeping
File size: 7,154 Bytes
e1c5ae4 1c5b9ef e1c5ae4 0102b23 e1c5ae4 0102b23 e1c5ae4 0102b23 e1c5ae4 0102b23 e1c5ae4 0102b23 e1c5ae4 0102b23 e1c5ae4 0102b23 e1c5ae4 0102b23 e1c5ae4 1c5b9ef e1c5ae4 1c5b9ef e1c5ae4 1c5b9ef e1c5ae4 1c5b9ef 0102b23 e1c5ae4 0102b23 e1c5ae4 1c5b9ef e1c5ae4 1c5b9ef e1c5ae4 1c5b9ef e1c5ae4 1c5b9ef e1c5ae4 1c5b9ef e1c5ae4 1c5b9ef e1c5ae4 0102b23 e1c5ae4 0102b23 e1c5ae4 0102b23 e1c5ae4 1c5b9ef e1c5ae4 1c5b9ef e1c5ae4 0102b23 e1c5ae4 0102b23 e1c5ae4 1c5b9ef e1c5ae4 1c5b9ef 0102b23 e1c5ae4 0102b23 e1c5ae4 0102b23 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
# app.py
# Hugging Face Space: Gradio app that chats about Gradio docs via the Gradio Docs MCP server.
# Requirements:
# - gradio
# - huggingface_hub
#
# Space secret needed:
# - HUGGING_FACE_HUB_TOKEN (or HF_TOKEN)
import os
import asyncio
from typing import Any, Dict, Iterable, List, Optional
import gradio as gr
from huggingface_hub import MCPClient
# ----------------------------
# Configuration
# ----------------------------
GRADIO_DOCS_MCP_SSE = os.environ.get(
"GRADIO_DOCS_MCP_SSE",
"https://gradio-docs-mcp.hf.space/gradio_api/mcp/sse",
)
# Choose a tool-capable chat model you have access to
MODEL_ID = os.environ.get("CHAT_MODEL", "meta-llama/Meta-Llama-3.1-8B-Instruct")
PROVIDER = os.environ.get("CHAT_PROVIDER", "hf-inference") # e.g., "hf-inference"
HF_TOKEN = os.environ.get("HUGGING_FACE_HUB_TOKEN") or os.environ.get("HF_TOKEN")
SYSTEM_PROMPT = (
"You are a helpful assistant that answers questions strictly using the Gradio documentation "
"via the MCP tools provided by the Gradio Docs MCP server. Prefer the latest docs. "
"When helpful, cite classes/functions (e.g., gr.Interface) and include short code examples."
)
# ----------------------------
# MCP Client (lazy init)
# ----------------------------
mcp_client: Optional[MCPClient] = None
_initialized = False
_init_lock = asyncio.Lock()
def get_mcp_client() -> MCPClient:
global mcp_client
if mcp_client is None:
# Pass api_key so provider calls are authenticated (fixes 401)
mcp_client = MCPClient(model=MODEL_ID, provider=PROVIDER, api_key=HF_TOKEN)
return mcp_client
async def ensure_init():
"""
Lazily attach the Gradio Docs MCP server. In some huggingface_hub versions
add_mcp_server is async, so it must be awaited exactly once.
"""
global _initialized
if _initialized:
return
async with _init_lock:
if _initialized:
return
client = get_mcp_client()
# Await the coroutine to avoid "was never awaited" warning
await client.add_mcp_server(
type="sse",
url=GRADIO_DOCS_MCP_SSE,
timeout=30,
# headers={"Authorization": f"Bearer {HF_TOKEN}"} # Not needed for public server
)
_initialized = True
# ----------------------------
# Helpers for messages
# ----------------------------
def to_llm_messages(history_msgs: List[Dict[str, Any]], user_msg: str) -> List[Dict[str, Any]]:
"""
Convert Chatbot messages list (role/content dicts) to the LLM format,
with a system message prepended and the new user message appended.
"""
msgs: List[Dict[str, Any]] = [{"role": "system", "content": SYSTEM_PROMPT}]
# Keep only role/content keys
for m in history_msgs:
role = m.get("role")
content = m.get("content")
if role in ("user", "assistant") and isinstance(content, str):
msgs.append({"role": role, "content": content})
msgs.append({"role": "user", "content": user_msg})
return msgs
async def stream_answer(messages_for_llm: List[Dict[str, Any]]) -> Iterable[str]:
"""
Stream deltas and tool logs from MCPClient.process_single_turn_with_tools.
"""
await ensure_init()
client = get_mcp_client()
# Helpful pre-flight checks
if not HF_TOKEN:
yield (
"⚠️ Missing token: set `HUGGING_FACE_HUB_TOKEN` (or `HF_TOKEN`) "
"in your Space **Settings → Secrets** to authenticate the model call."
)
return
try:
async for chunk in client.process_single_turn_with_tools(messages_for_llm):
# chunk is a dict describing text deltas and tool activity
if isinstance(chunk, dict):
ctype = chunk.get("type")
if ctype == "tool_log":
name = chunk.get("tool", "tool")
status = chunk.get("status", "")
yield f"\n\n_(using **{name}** {status})_"
elif ctype == "text_delta":
yield chunk.get("delta", "")
elif ctype == "text":
yield chunk.get("text", "")
elif ctype == "tool_result":
content = chunk.get("content")
if isinstance(content, str) and content.strip():
yield f"\n\n**Result:**\n{content}"
else:
# Fallback if provider yields plain strings
yield str(chunk)
except Exception as e:
msg = str(e)
if "401" in msg or "Unauthorized" in msg:
yield (
"❌ Unauthorized (401). Your model call was rejected.\n\n"
"- Ensure the Space secret `HUGGING_FACE_HUB_TOKEN` is set and valid.\n"
"- Confirm the selected `CHAT_MODEL` is accessible with your token.\n"
"- If using `hf-inference`, verify your org access/limits."
)
else:
yield f"❌ Error: {msg}"
# ----------------------------
# Gradio UI
# ----------------------------
with gr.Blocks(fill_height=True) as demo:
gr.Markdown(
"# 🤖 Gradio Docs Chat (MCP Client)\n"
"Ask anything about **Gradio**. Answers are grounded in the official docs via MCP."
)
# Using the new messages format
chat = gr.Chatbot(
label="Gradio Docs Assistant",
height=520,
type="messages", # expects a list of dicts: {"role": "...", "content": "..."}
)
with gr.Row():
msg = gr.Textbox(
placeholder="e.g., How do I use gr.Interface with multiple inputs?",
scale=9,
autofocus=True,
)
send_btn = gr.Button("Send", scale=1, variant="primary")
with gr.Row():
clear = gr.ClearButton(components=[chat, msg], value="Clear")
info = gr.Markdown(
f"**Model:** `{MODEL_ID}` · **Provider:** `{PROVIDER}` · **MCP:** Gradio Docs SSE",
)
async def on_submit(user_msg: str, history_msgs: List[Dict[str, Any]]):
"""
history_msgs is a list of {"role": ..., "content": ...} dicts.
We append the user's message, then stream the assistant reply by
updating/overwriting the last assistant message content.
"""
history_msgs = (history_msgs or []) + [{"role": "user", "content": user_msg}]
# Add a placeholder assistant message to stream into
history_msgs.append({"role": "assistant", "content": ""})
yield history_msgs
# Build LLM messages and stream chunks
messages_for_llm = to_llm_messages(history_msgs[:-1], user_msg)
async for delta in stream_answer(messages_for_llm):
# Append the delta to the last assistant message
history_msgs[-1]["content"] += delta
yield history_msgs
# Wire both Enter and button click
msg.submit(on_submit, inputs=[msg, chat], outputs=chat, queue=True)
send_btn.click(on_submit, inputs=[msg, chat], outputs=chat, queue=True)
# For local dev; on Spaces, Gradio calls launch automatically.
if __name__ == "__main__":
demo.launch() |