Spaces:
Sleeping
Sleeping
File size: 7,180 Bytes
e1c5ae4 1c5b9ef 51bd84d e1c5ae4 0102b23 e1c5ae4 0102b23 e1c5ae4 0102b23 51bd84d 9bb7270 51bd84d 0102b23 e1c5ae4 0102b23 e1c5ae4 0102b23 e1c5ae4 51bd84d e1c5ae4 51bd84d e1c5ae4 0102b23 e1c5ae4 51bd84d e1c5ae4 1c5b9ef e1c5ae4 1c5b9ef e1c5ae4 1c5b9ef e1c5ae4 1c5b9ef 51bd84d 1c5b9ef 0102b23 e1c5ae4 0102b23 e1c5ae4 51bd84d e1c5ae4 51bd84d e1c5ae4 1c5b9ef e1c5ae4 1c5b9ef e1c5ae4 1c5b9ef e1c5ae4 1c5b9ef e1c5ae4 1c5b9ef e1c5ae4 1c5b9ef e1c5ae4 51bd84d e1c5ae4 51bd84d e1c5ae4 0102b23 e1c5ae4 0102b23 e1c5ae4 0102b23 e1c5ae4 51bd84d e1c5ae4 51bd84d e1c5ae4 0102b23 e1c5ae4 0102b23 e1c5ae4 1c5b9ef e1c5ae4 1c5b9ef 51bd84d 1c5b9ef 0102b23 e1c5ae4 0102b23 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 |
# app.py
# Hugging Face Space: Gradio app that chats about Gradio docs via the Gradio Docs MCP server.
# Requirements:
# - gradio
# - huggingface_hub
#
# Space secret needed:
# - HUGGING_FACE_HUB_TOKEN or HF_TOKEN (must have access to provider="novita")
import os
import asyncio
from typing import Any, Dict, Iterable, List, Optional
import gradio as gr
from huggingface_hub import MCPClient
# ----------------------------
# Configuration
# ----------------------------
GRADIO_DOCS_MCP_SSE = os.environ.get(
"GRADIO_DOCS_MCP_SSE",
"https://gradio-docs-mcp.hf.space/gradio_api/mcp/sse",
)
# Use Novita provider + the model you specified
MODEL_ID = os.environ.get("CHAT_MODEL", "openai/gpt-oss-20b")
PROVIDER = os.environ.get("CHAT_PROVIDER", "novita") # <-- IMPORTANT
# Accept either env name
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
SYSTEM_PROMPT = (
"You are a helpful assistant that answers questions strictly using the Gradio documentation "
"via the MCP tools provided by the Gradio Docs MCP server. Prefer the latest docs. "
"When helpful, cite classes/functions (e.g., gr.Interface) and include short code examples."
)
# ----------------------------
# MCP Client (lazy init)
# ----------------------------
mcp_client: Optional[MCPClient] = None
_initialized = False
_init_lock = asyncio.Lock()
def get_mcp_client() -> MCPClient:
"""
Create a single global MCPClient configured to use provider='novita'
and the given MODEL_ID. MCPClient internally uses huggingface_hub's
InferenceClient, so this matches your direct-Novita snippet.
"""
global mcp_client
if mcp_client is None:
mcp_client = MCPClient(
model=MODEL_ID,
provider=PROVIDER, # novita
api_key=HF_TOKEN, # token must permit novita access
)
return mcp_client
async def ensure_init():
"""
Lazily attach the Gradio Docs MCP server. In some huggingface_hub versions
add_mcp_server is async, so it must be awaited exactly once.
"""
global _initialized
if _initialized:
return
async with _init_lock:
if _initialized:
return
client = get_mcp_client()
# Await the coroutine to avoid "was never awaited" warnings
await client.add_mcp_server(
type="sse",
url=GRADIO_DOCS_MCP_SSE,
timeout=30,
)
_initialized = True
# ----------------------------
# Helpers for messages
# ----------------------------
def to_llm_messages(history_msgs: List[Dict[str, Any]], user_msg: str) -> List[Dict[str, Any]]:
"""
Convert Chatbot messages list (role/content dicts) to the LLM format,
with a system message prepended and the new user message appended.
"""
msgs: List[Dict[str, Any]] = [{"role": "system", "content": SYSTEM_PROMPT}]
for m in history_msgs or []:
role = m.get("role")
content = m.get("content")
if role in ("user", "assistant") and isinstance(content, str):
msgs.append({"role": role, "content": content})
msgs.append({"role": "user", "content": user_msg})
return msgs
async def stream_answer(messages_for_llm: List[Dict[str, Any]]) -> Iterable[str]:
"""
Stream deltas and tool logs from MCPClient.process_single_turn_with_tools.
"""
await ensure_init()
client = get_mcp_client()
# Pre-flight checks
if not HF_TOKEN:
yield (
"⚠️ Missing token: set `HF_TOKEN` (or `HUGGING_FACE_HUB_TOKEN`) "
"in your Space **Settings → Secrets**. The token must allow provider='novita'."
)
return
try:
async for chunk in client.process_single_turn_with_tools(messages_for_llm):
if isinstance(chunk, dict):
ctype = chunk.get("type")
if ctype == "tool_log":
name = chunk.get("tool", "tool")
status = chunk.get("status", "")
yield f"\n\n_(using **{name}** {status})_"
elif ctype == "text_delta":
yield chunk.get("delta", "")
elif ctype == "text":
yield chunk.get("text", "")
elif ctype == "tool_result":
content = chunk.get("content")
if isinstance(content, str) and content.strip():
yield f"\n\n**Result:**\n{content}"
else:
yield str(chunk)
except Exception as e:
msg = str(e)
# Common failure modes
if "401" in msg or "Unauthorized" in msg:
yield (
"❌ Unauthorized (401). Your model call was rejected by Novita.\n\n"
"- Ensure the Space secret `HF_TOKEN` is set and valid.\n"
"- Confirm `HF_TOKEN` has access to provider='novita' and the model.\n"
)
elif "400" in msg or "Bad Request" in msg:
yield (
"❌ Bad Request (400). The Novita endpoint rejected the request.\n\n"
"- Double-check `CHAT_MODEL` (currently "
f"`{MODEL_ID}`) is valid for provider='novita'.\n"
"- Ensure your `HF_TOKEN` has the necessary permissions.\n"
)
else:
yield f"❌ Error: {msg}"
# ----------------------------
# Gradio UI
# ----------------------------
with gr.Blocks(fill_height=True) as demo:
gr.Markdown(
"# 🤖 Gradio Docs Chat (MCP Client via Novita)\n"
"Ask anything about **Gradio**. Answers are grounded in the official docs via MCP."
)
chat = gr.Chatbot(
label="Gradio Docs Assistant",
height=520,
type="messages", # expects: [{"role": "...", "content": "..."}]
)
with gr.Row():
msg = gr.Textbox(
placeholder="e.g., How do I use gr.Interface with multiple inputs?",
scale=9,
autofocus=True,
)
send_btn = gr.Button("Send", scale=1, variant="primary")
with gr.Row():
clear = gr.ClearButton(components=[chat, msg], value="Clear")
info = gr.Markdown(
f"**Model:** `{MODEL_ID}` · **Provider:** `{PROVIDER}` · **MCP:** Gradio Docs SSE",
)
async def on_submit(user_msg: str, history_msgs: List[Dict[str, Any]]):
"""
history_msgs is a list of {"role": ..., "content": ...} dicts.
We append the user's message, then stream the assistant reply by
updating the last assistant message content.
"""
history_msgs = (history_msgs or []) + [{"role": "user", "content": user_msg}]
history_msgs.append({"role": "assistant", "content": ""})
yield history_msgs
messages_for_llm = to_llm_messages(history_msgs[:-1], user_msg)
async for delta in stream_answer(messages_for_llm):
history_msgs[-1]["content"] += delta
yield history_msgs
msg.submit(on_submit, inputs=[msg, chat], outputs=chat, queue=True)
send_btn.click(on_submit, inputs=[msg, chat], outputs=chat, queue=True)
if __name__ == "__main__":
demo.launch() |