# app.py # Hugging Face Space: Gradio app that chats about Gradio docs via the Gradio Docs MCP server. # Requirements: # - gradio # - huggingface_hub # # Space secret needed: # - HUGGING_FACE_HUB_TOKEN or HF_TOKEN (must have access to provider="novita") import os import asyncio from typing import Any, Dict, Iterable, List, Optional import gradio as gr from huggingface_hub import MCPClient # ---------------------------- # Configuration # ---------------------------- GRADIO_DOCS_MCP_SSE = os.environ.get( "GRADIO_DOCS_MCP_SSE", "https://gradio-docs-mcp.hf.space/gradio_api/mcp/sse", ) # Use Novita provider + the model you specified MODEL_ID = os.environ.get("CHAT_MODEL", "openai/gpt-oss-20b") PROVIDER = os.environ.get("CHAT_PROVIDER", "novita") # <-- IMPORTANT # Accept either env name HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") SYSTEM_PROMPT = ( "You are a helpful assistant that answers questions strictly using the Gradio documentation " "via the MCP tools provided by the Gradio Docs MCP server. Prefer the latest docs. " "When helpful, cite classes/functions (e.g., gr.Interface) and include short code examples." ) # ---------------------------- # MCP Client (lazy init) # ---------------------------- mcp_client: Optional[MCPClient] = None _initialized = False _init_lock = asyncio.Lock() def get_mcp_client() -> MCPClient: """ Create a single global MCPClient configured to use provider='novita' and the given MODEL_ID. MCPClient internally uses huggingface_hub's InferenceClient, so this matches your direct-Novita snippet. """ global mcp_client if mcp_client is None: mcp_client = MCPClient( model=MODEL_ID, provider=PROVIDER, # novita api_key=HF_TOKEN, # token must permit novita access ) return mcp_client async def ensure_init(): """ Lazily attach the Gradio Docs MCP server. In some huggingface_hub versions add_mcp_server is async, so it must be awaited exactly once. """ global _initialized if _initialized: return async with _init_lock: if _initialized: return client = get_mcp_client() # Await the coroutine to avoid "was never awaited" warnings await client.add_mcp_server( type="sse", url=GRADIO_DOCS_MCP_SSE, timeout=30, ) _initialized = True # ---------------------------- # Helpers for messages # ---------------------------- def to_llm_messages(history_msgs: List[Dict[str, Any]], user_msg: str) -> List[Dict[str, Any]]: """ Convert Chatbot messages list (role/content dicts) to the LLM format, with a system message prepended and the new user message appended. """ msgs: List[Dict[str, Any]] = [{"role": "system", "content": SYSTEM_PROMPT}] for m in history_msgs or []: role = m.get("role") content = m.get("content") if role in ("user", "assistant") and isinstance(content, str): msgs.append({"role": role, "content": content}) msgs.append({"role": "user", "content": user_msg}) return msgs async def stream_answer(messages_for_llm: List[Dict[str, Any]]) -> Iterable[str]: """ Stream deltas and tool logs from MCPClient.process_single_turn_with_tools. """ await ensure_init() client = get_mcp_client() # Pre-flight checks if not HF_TOKEN: yield ( "⚠️ Missing token: set `HF_TOKEN` (or `HUGGING_FACE_HUB_TOKEN`) " "in your Space **Settings → Secrets**. The token must allow provider='novita'." ) return try: async for chunk in client.process_single_turn_with_tools(messages_for_llm): if isinstance(chunk, dict): ctype = chunk.get("type") if ctype == "tool_log": name = chunk.get("tool", "tool") status = chunk.get("status", "") yield f"\n\n_(using **{name}** {status})_" elif ctype == "text_delta": yield chunk.get("delta", "") elif ctype == "text": yield chunk.get("text", "") elif ctype == "tool_result": content = chunk.get("content") if isinstance(content, str) and content.strip(): yield f"\n\n**Result:**\n{content}" else: yield str(chunk) except Exception as e: msg = str(e) # Common failure modes if "401" in msg or "Unauthorized" in msg: yield ( "❌ Unauthorized (401). Your model call was rejected by Novita.\n\n" "- Ensure the Space secret `HF_TOKEN` is set and valid.\n" "- Confirm `HF_TOKEN` has access to provider='novita' and the model.\n" ) elif "400" in msg or "Bad Request" in msg: yield ( "❌ Bad Request (400). The Novita endpoint rejected the request.\n\n" "- Double-check `CHAT_MODEL` (currently " f"`{MODEL_ID}`) is valid for provider='novita'.\n" "- Ensure your `HF_TOKEN` has the necessary permissions.\n" ) else: yield f"❌ Error: {msg}" # ---------------------------- # Gradio UI # ---------------------------- with gr.Blocks(fill_height=True) as demo: gr.Markdown( "# 🤖 Gradio Docs Chat (MCP Client via Novita)\n" "Ask anything about **Gradio**. Answers are grounded in the official docs via MCP." ) chat = gr.Chatbot( label="Gradio Docs Assistant", height=520, type="messages", # expects: [{"role": "...", "content": "..."}] ) with gr.Row(): msg = gr.Textbox( placeholder="e.g., How do I use gr.Interface with multiple inputs?", scale=9, autofocus=True, ) send_btn = gr.Button("Send", scale=1, variant="primary") with gr.Row(): clear = gr.ClearButton(components=[chat, msg], value="Clear") info = gr.Markdown( f"**Model:** `{MODEL_ID}` · **Provider:** `{PROVIDER}` · **MCP:** Gradio Docs SSE", ) async def on_submit(user_msg: str, history_msgs: List[Dict[str, Any]]): """ history_msgs is a list of {"role": ..., "content": ...} dicts. We append the user's message, then stream the assistant reply by updating the last assistant message content. """ history_msgs = (history_msgs or []) + [{"role": "user", "content": user_msg}] history_msgs.append({"role": "assistant", "content": ""}) yield history_msgs messages_for_llm = to_llm_messages(history_msgs[:-1], user_msg) async for delta in stream_answer(messages_for_llm): history_msgs[-1]["content"] += delta yield history_msgs msg.submit(on_submit, inputs=[msg, chat], outputs=chat, queue=True) send_btn.click(on_submit, inputs=[msg, chat], outputs=chat, queue=True) if __name__ == "__main__": demo.launch()