gradio-doc / app.py
sudipta26889's picture
Create app.py
0102b23 verified
raw
history blame
4.46 kB
import asyncio
import os
from typing import List, Dict, Any, Iterable
import gradio as gr
from huggingface_hub import MCPClient
# ---- CONFIG ----
# The official Gradio Docs MCP Server (SSE) endpoint
GRADIO_DOCS_MCP_SSE = "https://gradio-docs-mcp.hf.space/gradio_api/mcp/sse"
# Choose an LLM that supports tool-calling via HF Inference
# You can switch to any provider/model you have access to.
MODEL_ID = os.environ.get("CHAT_MODEL", "meta-llama/Meta-Llama-3.1-8B-Instruct")
PROVIDER = os.environ.get("CHAT_PROVIDER", "hf-inference") # or "auto"
def make_mcp_client() -> MCPClient:
"""
Create an MCPClient that:
1) connects to the Gradio Docs MCP (SSE)
2) talks to a tool-capable model via Hugging Face Inference
"""
client = MCPClient(model=MODEL_ID, provider=PROVIDER)
client.add_mcp_server(
type="sse",
url=GRADIO_DOCS_MCP_SSE,
# Optionally pass headers=..., timeout=..., sse_read_timeout=...
)
return client
# Single global client for the Space runtime
mcp_client = make_mcp_client()
SYSTEM_PROMPT = (
"You are a helpful assistant that answers questions strictly using the "
"Gradio documentation via the MCP tools provided. Prefer the latest docs. "
"Cite function/class names from the docs and include short code examples when relevant."
)
def to_messages(history: List[Dict[str, Any]], user_msg: str) -> List[Dict[str, Any]]:
messages: List[Dict[str, Any]] = []
# Add a system message up front
messages.append({"role": "system", "content": SYSTEM_PROMPT})
for turn in history:
# gr.Chatbot returns [("user", "assistant"), ...] pairs as list of lists
user, bot = turn
if user:
messages.append({"role": "user", "content": str(user)})
if bot:
messages.append({"role": "assistant", "content": str(bot)})
messages.append({"role": "user", "content": user_msg})
return messages
async def stream_answer(messages: List[Dict[str, Any]]) -> Iterable[str]:
"""
Stream chunks from the MCPClient while it injects and executes
the Gradio Docs MCP tools under the hood.
"""
# The MCPClient will:
# - list tools from the server
# - give them to the model
# - execute tools if the model chooses to call them
# - stream back model text and tool results
async for chunk in mcp_client.process_single_turn_with_tools(messages):
# chunk can be dicts representing text deltas and/or tool results
if isinstance(chunk, dict):
if chunk.get("type") == "tool_log":
# Show tool activity as small, inline updates
name = chunk.get("tool", "tool")
status = chunk.get("status", "")
yield f"\n\n_(using **{name}** {status})_"
elif chunk.get("type") == "text_delta":
yield chunk.get("delta", "")
elif chunk.get("type") == "text":
yield chunk.get("text", "")
elif chunk.get("type") == "tool_result":
# Nicely format tool results if text content returned
content = chunk.get("content")
if isinstance(content, str) and content.strip():
yield f"\n\n**Result:**\n{content}"
else:
# Fallback if a provider returns plain text
yield str(chunk)
async def respond(user_msg: str, history: List[List[str]]):
messages = to_messages(history, user_msg)
partial = ""
async for piece in stream_answer(messages):
partial += piece
yield partial
with gr.Blocks(fill_height=True) as demo:
gr.Markdown("# 🤖 Gradio Docs Chat (MCP Client)\nAsk anything about Gradio—answers are grounded in the official docs via MCP.")
chat = gr.Chatbot(height=520, type="messages")
msg = gr.Textbox(placeholder="e.g., How do I use gr.Interface with multiple inputs?", scale=1)
with gr.Row():
clear = gr.ClearButton(components=[chat], value="Clear")
async def on_submit(user_msg, history):
history = history + [[user_msg, ""]]
stream = respond(user_msg, history[:-1])
async for chunk in stream:
history[-1][1] = chunk
yield history
msg.submit(
fn=on_submit,
inputs=[msg, chat],
outputs=chat,
)
if __name__ == "__main__":
# Spaces will call demo.launch() automatically, but keep for local dev
demo.launch()