Spaces:

sudipta26889
/

gradio-doc

Sleeping

App Files Files Community

gradio-doc / app.py

sudipta26889

Update app.py

b7bf9b1 verified 19 days ago

raw

history blame

13.8 kB

	# app.py
	# Hugging Face Space: Gradio app that chats about Gradio docs via the Gradio Docs MCP server.
	# Features:
	# • MCP tool-calling (connects to the official Gradio Docs MCP SSE server)
	# • Streaming responses with live tool logs
	# • Optional "Concise / Detailed" answer style
	# • Lightweight citations panel (summarizes MCP tool hits)
	#
	# Requirements (in requirements.txt):
	# gradio>=4.44.0
	# huggingface_hub>=0.24.0
	# spaces # only needed if your Space hardware is ZeroGPU
	#
	# Space secrets needed:
	# - HUGGING_FACE_HUB_TOKEN or HF_TOKEN (token must allow using the selected provider/model)
	#
	# Notes:
	# - The default model/provider below are known to work with chat + tool calling via the HF router.
	# - You can override via Space Secrets: CHAT_MODEL, CHAT_PROVIDER, GRADIO_DOCS_MCP_SSE.

	import os
	import asyncio
	import inspect
	from typing import Any, Dict, Iterable, List, Optional, Tuple

	import gradio as gr
	from huggingface_hub import MCPClient

	# -------------------------------------------------
	# Optional ZeroGPU shim (silences "No @spaces.GPU..." startup message)
	# -------------------------------------------------
	SPACES_ZERO_GPU = bool(os.environ.get("SPACE_ZERO_GPU", "")) # HF sets this in ZeroGPU envs
	try:
	import spaces # type: ignore
	except Exception:
	spaces = None # not installed on CPU Spaces; harmless

	if spaces is not None:
	@spaces.GPU # this just lets ZeroGPU detect a GPU-capable function at startup
	def _zero_gpu_probe():
	# We don't actually need a GPU for this app (remote inference),
	# but the decorated function's presence satisfies ZeroGPU's startup check.
	return "ok"

	# ----------------------------
	# Configuration
	# ----------------------------
	GRADIO_DOCS_MCP_SSE = os.environ.get(
	"GRADIO_DOCS_MCP_SSE",
	"https://gradio-docs-mcp.hf.space/gradio_api/mcp/sse",
	)

	# Use a router model that supports OpenAI-style chat + tool calling.
	MODEL_ID = os.environ.get("CHAT_MODEL", "meta-llama/Meta-Llama-3.1-8B-Instruct")
	PROVIDER = os.environ.get("CHAT_PROVIDER", "hf-inference") # router
	HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")

	BASE_SYSTEM_PROMPT = (
	"You are a helpful assistant that answers strictly using the Gradio documentation "
	"via the MCP tools provided by the Gradio Docs MCP server. Prefer the latest docs. "
	"Cite relevant class/function names (e.g., gr.Interface) and include short code examples when helpful."
	)
	CONCISE_SUFFIX = " Keep answers concise (3–6 sentences) unless code is necessary."
	DETAILED_SUFFIX = " Provide a detailed, step-by-step answer with short code where helpful."

	# ----------------------------
	# MCP Client (lazy init, reconfigurable)
	# ----------------------------
	mcp_client: Optional[MCPClient] = None
	_initialized = False
	_init_lock = asyncio.Lock()

	def _current_system_prompt(style: str) -> str:
	return BASE_SYSTEM_PROMPT + (CONCISE_SUFFIX if style == "Concise" else DETAILED_SUFFIX)

	def _reset_client():
	"""Reset the global client so a new one is created with updated env (if any)."""
	global mcp_client, _initialized
	mcp_client = None
	_initialized = False

	def get_mcp_client(model_id: str, provider: str, api_key: Optional[str]) -> MCPClient:
	global mcp_client
	if mcp_client is None:
	mcp_client = MCPClient(model=model_id, provider=provider, api_key=api_key)
	return mcp_client

	async def _maybe_await(result):
	"""Await result if it's an awaitable (handles old/new hub versions)."""
	if inspect.isawaitable(result):
	return await result
	return result

	async def ensure_init(model_id: str, provider: str, api_key: Optional[str]):
	"""Attach the Gradio Docs MCP server exactly once (supports async/sync add_mcp_server)."""
	global _initialized
	if _initialized:
	return
	async with _init_lock:
	if _initialized:
	return
	client = get_mcp_client(model_id, provider, api_key)
	# add_mcp_server may be coroutine or sync depending on version
	await _maybe_await(
	client.add_mcp_server(
	type="sse",
	url=GRADIO_DOCS_MCP_SSE,
	timeout=45,
	)
	)
	_initialized = True

	# ----------------------------
	# Message helpers
	# ----------------------------
	def to_llm_messages(history_msgs: List[Dict[str, Any]], user_msg: str, style: str) -> List[Dict[str, Any]]:
	"""
	Convert Chatbot messages list (role/content dicts) to the LLM format,
	with a system message prepended and the new user message appended.
	"""
	msgs: List[Dict[str, Any]] = [{"role": "system", "content": _current_system_prompt(style)}]
	for m in history_msgs or []:
	role = m.get("role")
	content = m.get("content")
	if role in ("user", "assistant") and isinstance(content, str):
	msgs.append({"role": role, "content": content})
	msgs.append({"role": "user", "content": user_msg})
	return msgs

	# ----------------------------
	# Streaming + side-panels (tool logs & citations)
	# ----------------------------
	def _append_log(log_lines: List[str], line: str, max_lines: int = 200) -> None:
	log_lines.append(line)
	if len(log_lines) > max_lines:
	del log_lines[: len(log_lines) - max_lines]

	def _format_tool_log(log_lines: List[str]) -> str:
	if not log_lines:
	return "_No tool activity yet._"
	return "\n".join(log_lines)

	def _format_citations(cites: List[Tuple[str, Optional[str]]]) -> str:
	if not cites:
	return "_No citations captured yet._"
	last = cites[-12:] # show recent
	lines = []
	for (label, url) in last:
	if url:
	lines.append(f"- {label} — {url}")
	else:
	lines.append(f"- {label}")
	return "\n".join(lines)

	async def stream_answer(
	messages_for_llm: List[Dict[str, Any]],
	model_id: str,
	provider: str,
	api_key: Optional[str],
	) -> Iterable[Dict[str, Any]]:
	"""
	Stream deltas and tool logs from MCPClient.process_single_turn_with_tools.
	Yield dicts with partial text plus updated tool logs & citations so the UI can update three panels.
	"""
	await ensure_init(model_id, provider, api_key)
	client = get_mcp_client(model_id, provider, api_key)

	tool_log: List[str] = []
	citations: List[Tuple[str, Optional[str]]] = [] # (label, url)

	# Early token check
	if not api_key:
	yield {
	"delta": (
	"⚠️ Missing token: set `HUGGING_FACE_HUB_TOKEN` or `HF_TOKEN` "
	"in your Space Settings → Secrets to authenticate model calls."
	),
	"tool_log": _format_tool_log(tool_log),
	"citations": _format_citations(citations),
	}
	return

	try:
	async for chunk in client.process_single_turn_with_tools(messages_for_llm):
	if isinstance(chunk, dict):
	ctype = chunk.get("type")

	if ctype == "tool_log":
	# Example: {"type": "tool_log", "tool": "...", "status": "started/finished"}
	name = chunk.get("tool", "tool")
	status = chunk.get("status", "")
	_append_log(tool_log, f"- {name} {status}")
	yield {"delta": "", "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}

	elif ctype == "text_delta":
	yield {"delta": chunk.get("delta", ""), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}

	elif ctype == "text":
	yield {"delta": chunk.get("text", ""), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}

	elif ctype == "tool_result":
	# Capture a useful citation label and optional URL if present
	tool_name = chunk.get("tool", "tool")
	content = chunk.get("content")
	url = None
	if isinstance(content, dict):
	url = content.get("url") or content.get("link")
	title = content.get("title") or content.get("name")
	label = title or tool_name
	elif isinstance(content, str):
	label = tool_name
	if "http://" in content or "https://" in content:
	start = content.find("http")
	url = content[start : start + 200].split("\n")[0].strip()
	else:
	label = tool_name

	citations.append((label, url))
	_append_log(tool_log, f" • {tool_name} returned result")
	snippet = ""
	if isinstance(content, str):
	snippet = content.strip()
	if len(snippet) > 700:
	snippet = snippet[:700] + "…"
	snippet = f"\n\nResult (from {tool_name}):\n{snippet}"
	yield {
	"delta": snippet,
	"tool_log": _format_tool_log(tool_log),
	"citations": _format_citations(citations),
	}

	else:
	# Fallback if provider yields plain strings
	yield {"delta": str(chunk), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}

	except Exception as e:
	msg = str(e)
	if "401" in msg or "Unauthorized" in msg:
	err = (
	"❌ Unauthorized (401). Ensure your token is set and permitted for the chosen model/provider.\n"
	f"- Model: `{model_id}`\n- Provider: `{provider}`\n"
	)
	elif "404" in msg or "Not Found" in msg:
	err = (
	"❌ Model not found (404). The default model may not be available via hf-inference.\n"
	"Consider setting `CHAT_MODEL` in your Space settings to a model that supports chat via the HF router."
	)
	else:
	err = f"❌ Error: {msg}"
	yield {"delta": err, "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}

	# ----------------------------
	# Gradio UI
	# ----------------------------
	with gr.Blocks(fill_height=True) as demo:
	gr.Markdown(
	"# 🤖 Gradio Docs Chat (MCP Client)\n"
	"Ask anything about Gradio. Answers are grounded in the official docs via MCP tools."
	)

	with gr.Row():
	with gr.Column(scale=7):
	chat = gr.Chatbot(
	label="Gradio Docs Assistant",
	height=520,
	type="messages", # expects: [{"role": "...", "content": "..."}]
	)
	with gr.Row():
	msg = gr.Textbox(
	placeholder="e.g., How do I use gr.Interface with multiple inputs?",
	scale=9,
	autofocus=True,
	)
	send_btn = gr.Button("Send", scale=1, variant="primary")

	clear = gr.ClearButton(components=[chat, msg], value="Clear")

	with gr.Column(scale=5):
	with gr.Accordion("⚙️ Settings", open=False):
	style = gr.Radio(
	label="Answer Style",
	choices=["Concise", "Detailed"],
	value="Detailed",
	)
	model_read = gr.Markdown(
	f"Model: `{MODEL_ID}` \nProvider: `{PROVIDER}` \n"
	"_(override via Space Secrets: `CHAT_MODEL`, `CHAT_PROVIDER`)_"
	)

	with gr.Accordion("🛠 Tool Activity (live)", open=True):
	tool_log_md = gr.Markdown("_No tool activity yet._")

	with gr.Accordion("📎 Citations (recent)", open=True):
	citations_md = gr.Markdown("_No citations captured yet._")

	async def on_submit(user_msg: str, history_msgs: List[Dict[str, Any]], style_choice: str):
	"""
	Append the user's message, then stream the assistant reply while updating:
	- chat text
	- tool activity
	- citations
	"""
	# Start a new assistant message for streaming
	history_msgs = (history_msgs or []) + [{"role": "user", "content": user_msg}]
	history_msgs.append({"role": "assistant", "content": ""})
	yield history_msgs, gr.update(value="_No tool activity yet._"), gr.update(value="_No citations captured yet._")

	# Compose messages for LLM
	messages_for_llm = to_llm_messages(history_msgs[:-1], user_msg, style_choice)

	async for chunk in stream_answer(messages_for_llm, MODEL_ID, PROVIDER, HF_TOKEN):
	delta = chunk.get("delta", "")
	if delta:
	history_msgs[-1]["content"] += delta
	yield history_msgs, gr.update(value=chunk.get("tool_log", "")), gr.update(value=chunk.get("citations", ""))

	# Wire both Enter and button click; also pass "style"
	msg.submit(on_submit, inputs=[msg, chat, style], outputs=[chat, tool_log_md, citations_md], queue=True)
	send_btn.click(on_submit, inputs=[msg, chat, style], outputs=[chat, tool_log_md, citations_md], queue=True)

	# ----------------------------
	# Gradio runtime (queue + launch)
	# ----------------------------
	# IMPORTANT: assign the queued app back to 'demo' for older Gradio versions.
	demo = demo.queue(max_size=32)

	# Always launch; Spaces runs this script directly. banner lines about "local URL" are normal.
	demo.launch(
	ssr_mode=False
	)