Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,7 +5,7 @@
|
|
5 |
# - huggingface_hub
|
6 |
#
|
7 |
# Space secret needed:
|
8 |
-
# - HUGGING_FACE_HUB_TOKEN
|
9 |
|
10 |
import os
|
11 |
import asyncio
|
@@ -22,10 +22,11 @@ GRADIO_DOCS_MCP_SSE = os.environ.get(
|
|
22 |
"https://gradio-docs-mcp.hf.space/gradio_api/mcp/sse",
|
23 |
)
|
24 |
|
25 |
-
#
|
26 |
MODEL_ID = os.environ.get("CHAT_MODEL", "openai/gpt-oss-20b")
|
27 |
-
PROVIDER = os.environ.get("CHAT_PROVIDER", "
|
28 |
-
|
|
|
29 |
|
30 |
SYSTEM_PROMPT = (
|
31 |
"You are a helpful assistant that answers questions strictly using the Gradio documentation "
|
@@ -42,10 +43,18 @@ _init_lock = asyncio.Lock()
|
|
42 |
|
43 |
|
44 |
def get_mcp_client() -> MCPClient:
|
|
|
|
|
|
|
|
|
|
|
45 |
global mcp_client
|
46 |
if mcp_client is None:
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
49 |
return mcp_client
|
50 |
|
51 |
|
@@ -62,12 +71,11 @@ async def ensure_init():
|
|
62 |
if _initialized:
|
63 |
return
|
64 |
client = get_mcp_client()
|
65 |
-
# Await the coroutine to avoid "was never awaited"
|
66 |
await client.add_mcp_server(
|
67 |
type="sse",
|
68 |
url=GRADIO_DOCS_MCP_SSE,
|
69 |
timeout=30,
|
70 |
-
# headers={"Authorization": f"Bearer {HF_TOKEN}"} # Not needed for public server
|
71 |
)
|
72 |
_initialized = True
|
73 |
|
@@ -81,8 +89,7 @@ def to_llm_messages(history_msgs: List[Dict[str, Any]], user_msg: str) -> List[D
|
|
81 |
with a system message prepended and the new user message appended.
|
82 |
"""
|
83 |
msgs: List[Dict[str, Any]] = [{"role": "system", "content": SYSTEM_PROMPT}]
|
84 |
-
|
85 |
-
for m in history_msgs:
|
86 |
role = m.get("role")
|
87 |
content = m.get("content")
|
88 |
if role in ("user", "assistant") and isinstance(content, str):
|
@@ -98,17 +105,16 @@ async def stream_answer(messages_for_llm: List[Dict[str, Any]]) -> Iterable[str]
|
|
98 |
await ensure_init()
|
99 |
client = get_mcp_client()
|
100 |
|
101 |
-
#
|
102 |
if not HF_TOKEN:
|
103 |
yield (
|
104 |
-
"β οΈ Missing token: set `
|
105 |
-
"in your Space **Settings β Secrets
|
106 |
)
|
107 |
return
|
108 |
|
109 |
try:
|
110 |
async for chunk in client.process_single_turn_with_tools(messages_for_llm):
|
111 |
-
# chunk is a dict describing text deltas and tool activity
|
112 |
if isinstance(chunk, dict):
|
113 |
ctype = chunk.get("type")
|
114 |
if ctype == "tool_log":
|
@@ -124,16 +130,22 @@ async def stream_answer(messages_for_llm: List[Dict[str, Any]]) -> Iterable[str]
|
|
124 |
if isinstance(content, str) and content.strip():
|
125 |
yield f"\n\n**Result:**\n{content}"
|
126 |
else:
|
127 |
-
# Fallback if provider yields plain strings
|
128 |
yield str(chunk)
|
129 |
except Exception as e:
|
130 |
msg = str(e)
|
|
|
131 |
if "401" in msg or "Unauthorized" in msg:
|
132 |
yield (
|
133 |
-
"β Unauthorized (401). Your model call was rejected.\n\n"
|
134 |
-
"- Ensure the Space secret `
|
135 |
-
"- Confirm
|
136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
)
|
138 |
else:
|
139 |
yield f"β Error: {msg}"
|
@@ -144,15 +156,14 @@ async def stream_answer(messages_for_llm: List[Dict[str, Any]]) -> Iterable[str]
|
|
144 |
# ----------------------------
|
145 |
with gr.Blocks(fill_height=True) as demo:
|
146 |
gr.Markdown(
|
147 |
-
"# π€ Gradio Docs Chat (MCP Client)\n"
|
148 |
"Ask anything about **Gradio**. Answers are grounded in the official docs via MCP."
|
149 |
)
|
150 |
|
151 |
-
# Using the new messages format
|
152 |
chat = gr.Chatbot(
|
153 |
label="Gradio Docs Assistant",
|
154 |
height=520,
|
155 |
-
type="messages", # expects
|
156 |
)
|
157 |
|
158 |
with gr.Row():
|
@@ -173,24 +184,19 @@ with gr.Blocks(fill_height=True) as demo:
|
|
173 |
"""
|
174 |
history_msgs is a list of {"role": ..., "content": ...} dicts.
|
175 |
We append the user's message, then stream the assistant reply by
|
176 |
-
updating
|
177 |
"""
|
178 |
history_msgs = (history_msgs or []) + [{"role": "user", "content": user_msg}]
|
179 |
-
# Add a placeholder assistant message to stream into
|
180 |
history_msgs.append({"role": "assistant", "content": ""})
|
181 |
yield history_msgs
|
182 |
|
183 |
-
# Build LLM messages and stream chunks
|
184 |
messages_for_llm = to_llm_messages(history_msgs[:-1], user_msg)
|
185 |
async for delta in stream_answer(messages_for_llm):
|
186 |
-
# Append the delta to the last assistant message
|
187 |
history_msgs[-1]["content"] += delta
|
188 |
yield history_msgs
|
189 |
|
190 |
-
# Wire both Enter and button click
|
191 |
msg.submit(on_submit, inputs=[msg, chat], outputs=chat, queue=True)
|
192 |
send_btn.click(on_submit, inputs=[msg, chat], outputs=chat, queue=True)
|
193 |
|
194 |
-
# For local dev; on Spaces, Gradio calls launch automatically.
|
195 |
if __name__ == "__main__":
|
196 |
demo.launch()
|
|
|
5 |
# - huggingface_hub
|
6 |
#
|
7 |
# Space secret needed:
|
8 |
+
# - HUGGING_FACE_HUB_TOKEN or HF_TOKEN (must have access to provider="novita")
|
9 |
|
10 |
import os
|
11 |
import asyncio
|
|
|
22 |
"https://gradio-docs-mcp.hf.space/gradio_api/mcp/sse",
|
23 |
)
|
24 |
|
25 |
+
# Use Novita provider + the model you specified
|
26 |
MODEL_ID = os.environ.get("CHAT_MODEL", "openai/gpt-oss-20b")
|
27 |
+
PROVIDER = os.environ.get("CHAT_PROVIDER", "novita") # <-- IMPORTANT
|
28 |
+
# Accept either env name
|
29 |
+
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
|
30 |
|
31 |
SYSTEM_PROMPT = (
|
32 |
"You are a helpful assistant that answers questions strictly using the Gradio documentation "
|
|
|
43 |
|
44 |
|
45 |
def get_mcp_client() -> MCPClient:
|
46 |
+
"""
|
47 |
+
Create a single global MCPClient configured to use provider='novita'
|
48 |
+
and the given MODEL_ID. MCPClient internally uses huggingface_hub's
|
49 |
+
InferenceClient, so this matches your direct-Novita snippet.
|
50 |
+
"""
|
51 |
global mcp_client
|
52 |
if mcp_client is None:
|
53 |
+
mcp_client = MCPClient(
|
54 |
+
model=MODEL_ID,
|
55 |
+
provider=PROVIDER, # novita
|
56 |
+
api_key=HF_TOKEN, # token must permit novita access
|
57 |
+
)
|
58 |
return mcp_client
|
59 |
|
60 |
|
|
|
71 |
if _initialized:
|
72 |
return
|
73 |
client = get_mcp_client()
|
74 |
+
# Await the coroutine to avoid "was never awaited" warnings
|
75 |
await client.add_mcp_server(
|
76 |
type="sse",
|
77 |
url=GRADIO_DOCS_MCP_SSE,
|
78 |
timeout=30,
|
|
|
79 |
)
|
80 |
_initialized = True
|
81 |
|
|
|
89 |
with a system message prepended and the new user message appended.
|
90 |
"""
|
91 |
msgs: List[Dict[str, Any]] = [{"role": "system", "content": SYSTEM_PROMPT}]
|
92 |
+
for m in history_msgs or []:
|
|
|
93 |
role = m.get("role")
|
94 |
content = m.get("content")
|
95 |
if role in ("user", "assistant") and isinstance(content, str):
|
|
|
105 |
await ensure_init()
|
106 |
client = get_mcp_client()
|
107 |
|
108 |
+
# Pre-flight checks
|
109 |
if not HF_TOKEN:
|
110 |
yield (
|
111 |
+
"β οΈ Missing token: set `HF_TOKEN` (or `HUGGING_FACE_HUB_TOKEN`) "
|
112 |
+
"in your Space **Settings β Secrets**. The token must allow provider='novita'."
|
113 |
)
|
114 |
return
|
115 |
|
116 |
try:
|
117 |
async for chunk in client.process_single_turn_with_tools(messages_for_llm):
|
|
|
118 |
if isinstance(chunk, dict):
|
119 |
ctype = chunk.get("type")
|
120 |
if ctype == "tool_log":
|
|
|
130 |
if isinstance(content, str) and content.strip():
|
131 |
yield f"\n\n**Result:**\n{content}"
|
132 |
else:
|
|
|
133 |
yield str(chunk)
|
134 |
except Exception as e:
|
135 |
msg = str(e)
|
136 |
+
# Common failure modes
|
137 |
if "401" in msg or "Unauthorized" in msg:
|
138 |
yield (
|
139 |
+
"β Unauthorized (401). Your model call was rejected by Novita.\n\n"
|
140 |
+
"- Ensure the Space secret `HF_TOKEN` is set and valid.\n"
|
141 |
+
"- Confirm `HF_TOKEN` has access to provider='novita' and the model.\n"
|
142 |
+
)
|
143 |
+
elif "400" in msg or "Bad Request" in msg:
|
144 |
+
yield (
|
145 |
+
"β Bad Request (400). The Novita endpoint rejected the request.\n\n"
|
146 |
+
"- Double-check `CHAT_MODEL` (currently "
|
147 |
+
f"`{MODEL_ID}`) is valid for provider='novita'.\n"
|
148 |
+
"- Ensure your `HF_TOKEN` has the necessary permissions.\n"
|
149 |
)
|
150 |
else:
|
151 |
yield f"β Error: {msg}"
|
|
|
156 |
# ----------------------------
|
157 |
with gr.Blocks(fill_height=True) as demo:
|
158 |
gr.Markdown(
|
159 |
+
"# π€ Gradio Docs Chat (MCP Client via Novita)\n"
|
160 |
"Ask anything about **Gradio**. Answers are grounded in the official docs via MCP."
|
161 |
)
|
162 |
|
|
|
163 |
chat = gr.Chatbot(
|
164 |
label="Gradio Docs Assistant",
|
165 |
height=520,
|
166 |
+
type="messages", # expects: [{"role": "...", "content": "..."}]
|
167 |
)
|
168 |
|
169 |
with gr.Row():
|
|
|
184 |
"""
|
185 |
history_msgs is a list of {"role": ..., "content": ...} dicts.
|
186 |
We append the user's message, then stream the assistant reply by
|
187 |
+
updating the last assistant message content.
|
188 |
"""
|
189 |
history_msgs = (history_msgs or []) + [{"role": "user", "content": user_msg}]
|
|
|
190 |
history_msgs.append({"role": "assistant", "content": ""})
|
191 |
yield history_msgs
|
192 |
|
|
|
193 |
messages_for_llm = to_llm_messages(history_msgs[:-1], user_msg)
|
194 |
async for delta in stream_answer(messages_for_llm):
|
|
|
195 |
history_msgs[-1]["content"] += delta
|
196 |
yield history_msgs
|
197 |
|
|
|
198 |
msg.submit(on_submit, inputs=[msg, chat], outputs=chat, queue=True)
|
199 |
send_btn.click(on_submit, inputs=[msg, chat], outputs=chat, queue=True)
|
200 |
|
|
|
201 |
if __name__ == "__main__":
|
202 |
demo.launch()
|