sudipta26889 commited on
Commit
6d7a07a
Β·
verified Β·
1 Parent(s): 51bd84d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +196 -96
app.py CHANGED
@@ -1,15 +1,25 @@
1
  # app.py
2
  # Hugging Face Space: Gradio app that chats about Gradio docs via the Gradio Docs MCP server.
 
 
 
 
 
 
3
  # Requirements:
4
  # - gradio
5
  # - huggingface_hub
6
  #
7
- # Space secret needed:
8
- # - HUGGING_FACE_HUB_TOKEN or HF_TOKEN (must have access to provider="novita")
 
 
 
 
9
 
10
  import os
11
  import asyncio
12
- from typing import Any, Dict, Iterable, List, Optional
13
 
14
  import gradio as gr
15
  from huggingface_hub import MCPClient
@@ -22,73 +32,71 @@ GRADIO_DOCS_MCP_SSE = os.environ.get(
22
  "https://gradio-docs-mcp.hf.space/gradio_api/mcp/sse",
23
  )
24
 
25
- # Use Novita provider + the model you specified
26
- MODEL_ID = os.environ.get("CHAT_MODEL", "openai/gpt-oss-20b")
27
- PROVIDER = os.environ.get("CHAT_PROVIDER", "novita") # <-- IMPORTANT
28
- # Accept either env name
29
  HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
30
 
31
- SYSTEM_PROMPT = (
32
- "You are a helpful assistant that answers questions strictly using the Gradio documentation "
33
  "via the MCP tools provided by the Gradio Docs MCP server. Prefer the latest docs. "
34
- "When helpful, cite classes/functions (e.g., gr.Interface) and include short code examples."
35
  )
36
 
 
 
 
37
  # ----------------------------
38
- # MCP Client (lazy init)
39
  # ----------------------------
40
  mcp_client: Optional[MCPClient] = None
41
  _initialized = False
42
  _init_lock = asyncio.Lock()
43
 
 
 
 
 
44
 
45
- def get_mcp_client() -> MCPClient:
46
- """
47
- Create a single global MCPClient configured to use provider='novita'
48
- and the given MODEL_ID. MCPClient internally uses huggingface_hub's
49
- InferenceClient, so this matches your direct-Novita snippet.
50
- """
 
51
  global mcp_client
52
  if mcp_client is None:
53
- mcp_client = MCPClient(
54
- model=MODEL_ID,
55
- provider=PROVIDER, # novita
56
- api_key=HF_TOKEN, # token must permit novita access
57
- )
58
  return mcp_client
59
 
60
-
61
- async def ensure_init():
62
- """
63
- Lazily attach the Gradio Docs MCP server. In some huggingface_hub versions
64
- add_mcp_server is async, so it must be awaited exactly once.
65
- """
66
  global _initialized
67
  if _initialized:
68
  return
69
-
70
  async with _init_lock:
71
  if _initialized:
72
  return
73
- client = get_mcp_client()
74
- # Await the coroutine to avoid "was never awaited" warnings
75
  await client.add_mcp_server(
76
  type="sse",
77
  url=GRADIO_DOCS_MCP_SSE,
78
- timeout=30,
79
  )
80
  _initialized = True
81
 
82
-
83
  # ----------------------------
84
- # Helpers for messages
85
  # ----------------------------
86
- def to_llm_messages(history_msgs: List[Dict[str, Any]], user_msg: str) -> List[Dict[str, Any]]:
87
  """
88
  Convert Chatbot messages list (role/content dicts) to the LLM format,
89
  with a system message prepended and the new user message appended.
90
  """
91
- msgs: List[Dict[str, Any]] = [{"role": "system", "content": SYSTEM_PROMPT}]
92
  for m in history_msgs or []:
93
  role = m.get("role")
94
  content = m.get("content")
@@ -97,106 +105,198 @@ def to_llm_messages(history_msgs: List[Dict[str, Any]], user_msg: str) -> List[D
97
  msgs.append({"role": "user", "content": user_msg})
98
  return msgs
99
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
- async def stream_answer(messages_for_llm: List[Dict[str, Any]]) -> Iterable[str]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  """
103
  Stream deltas and tool logs from MCPClient.process_single_turn_with_tools.
 
104
  """
105
- await ensure_init()
106
- client = get_mcp_client()
107
-
108
- # Pre-flight checks
109
- if not HF_TOKEN:
110
- yield (
111
- "⚠️ Missing token: set `HF_TOKEN` (or `HUGGING_FACE_HUB_TOKEN`) "
112
- "in your Space **Settings β†’ Secrets**. The token must allow provider='novita'."
113
- )
 
 
 
 
 
 
 
114
  return
115
 
116
  try:
117
  async for chunk in client.process_single_turn_with_tools(messages_for_llm):
118
  if isinstance(chunk, dict):
119
  ctype = chunk.get("type")
 
120
  if ctype == "tool_log":
 
121
  name = chunk.get("tool", "tool")
122
  status = chunk.get("status", "")
123
- yield f"\n\n_(using **{name}** {status})_"
 
 
124
  elif ctype == "text_delta":
125
- yield chunk.get("delta", "")
 
126
  elif ctype == "text":
127
- yield chunk.get("text", "")
 
128
  elif ctype == "tool_result":
 
 
129
  content = chunk.get("content")
130
- if isinstance(content, str) and content.strip():
131
- yield f"\n\n**Result:**\n{content}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  else:
133
- yield str(chunk)
 
 
134
  except Exception as e:
135
  msg = str(e)
136
- # Common failure modes
137
  if "401" in msg or "Unauthorized" in msg:
138
- yield (
139
- "❌ Unauthorized (401). Your model call was rejected by Novita.\n\n"
140
- "- Ensure the Space secret `HF_TOKEN` is set and valid.\n"
141
- "- Confirm `HF_TOKEN` has access to provider='novita' and the model.\n"
142
- )
143
- elif "400" in msg or "Bad Request" in msg:
144
- yield (
145
- "❌ Bad Request (400). The Novita endpoint rejected the request.\n\n"
146
- "- Double-check `CHAT_MODEL` (currently "
147
- f"`{MODEL_ID}`) is valid for provider='novita'.\n"
148
- "- Ensure your `HF_TOKEN` has the necessary permissions.\n"
149
  )
150
  else:
151
- yield f"❌ Error: {msg}"
152
-
153
 
154
  # ----------------------------
155
  # Gradio UI
156
  # ----------------------------
157
  with gr.Blocks(fill_height=True) as demo:
158
  gr.Markdown(
159
- "# πŸ€– Gradio Docs Chat (MCP Client via Novita)\n"
160
- "Ask anything about **Gradio**. Answers are grounded in the official docs via MCP."
161
- )
162
-
163
- chat = gr.Chatbot(
164
- label="Gradio Docs Assistant",
165
- height=520,
166
- type="messages", # expects: [{"role": "...", "content": "..."}]
167
  )
168
 
169
  with gr.Row():
170
- msg = gr.Textbox(
171
- placeholder="e.g., How do I use gr.Interface with multiple inputs?",
172
- scale=9,
173
- autofocus=True,
174
- )
175
- send_btn = gr.Button("Send", scale=1, variant="primary")
 
 
 
 
 
 
 
176
 
177
- with gr.Row():
178
- clear = gr.ClearButton(components=[chat, msg], value="Clear")
179
- info = gr.Markdown(
180
- f"**Model:** `{MODEL_ID}` Β· **Provider:** `{PROVIDER}` Β· **MCP:** Gradio Docs SSE",
181
- )
182
 
183
- async def on_submit(user_msg: str, history_msgs: List[Dict[str, Any]]):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  """
185
- history_msgs is a list of {"role": ..., "content": ...} dicts.
186
- We append the user's message, then stream the assistant reply by
187
- updating the last assistant message content.
 
188
  """
 
189
  history_msgs = (history_msgs or []) + [{"role": "user", "content": user_msg}]
190
  history_msgs.append({"role": "assistant", "content": ""})
191
- yield history_msgs
 
 
 
192
 
193
- messages_for_llm = to_llm_messages(history_msgs[:-1], user_msg)
194
- async for delta in stream_answer(messages_for_llm):
195
- history_msgs[-1]["content"] += delta
196
- yield history_msgs
 
197
 
198
- msg.submit(on_submit, inputs=[msg, chat], outputs=chat, queue=True)
199
- send_btn.click(on_submit, inputs=[msg, chat], outputs=chat, queue=True)
 
200
 
 
201
  if __name__ == "__main__":
202
  demo.launch()
 
1
  # app.py
2
  # Hugging Face Space: Gradio app that chats about Gradio docs via the Gradio Docs MCP server.
3
+ # Features:
4
+ # β€’ MCP tool-calling (connects to the official Gradio Docs MCP SSE server)
5
+ # β€’ Streaming responses with live tool logs
6
+ # β€’ Optional "Concise / Detailed" answer style
7
+ # β€’ Lightweight citations panel (summarizes MCP tool hits)
8
+ #
9
  # Requirements:
10
  # - gradio
11
  # - huggingface_hub
12
  #
13
+ # Space secrets needed:
14
+ # - HUGGING_FACE_HUB_TOKEN or HF_TOKEN (token must allow using the selected provider/model)
15
+ #
16
+ # Notes:
17
+ # - The default model/provider below are known to work with chat + tool calling via the HF router.
18
+ # - You can override via Space Secrets: CHAT_MODEL, CHAT_PROVIDER.
19
 
20
  import os
21
  import asyncio
22
+ from typing import Any, Dict, Iterable, List, Optional, Tuple
23
 
24
  import gradio as gr
25
  from huggingface_hub import MCPClient
 
32
  "https://gradio-docs-mcp.hf.space/gradio_api/mcp/sse",
33
  )
34
 
35
+ # Use a router model that supports OpenAI-style chat + tool calling.
36
+ # You can override these with Space Secrets if you like.
37
+ MODEL_ID = os.environ.get("CHAT_MODEL", "Qwen/Qwen2.5-7B-Instruct")
38
+ PROVIDER = os.environ.get("CHAT_PROVIDER", "hf-inference") # router
39
  HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
40
 
41
+ BASE_SYSTEM_PROMPT = (
42
+ "You are a helpful assistant that answers strictly using the Gradio documentation "
43
  "via the MCP tools provided by the Gradio Docs MCP server. Prefer the latest docs. "
44
+ "Cite relevant class/function names (e.g., gr.Interface) and include short code examples when helpful."
45
  )
46
 
47
+ CONCISE_SUFFIX = " Keep answers concise (3–6 sentences) unless code is necessary."
48
+ DETAILED_SUFFIX = " Provide a detailed, step-by-step answer with short code where helpful."
49
+
50
  # ----------------------------
51
+ # MCP Client (lazy init, reconfigurable)
52
  # ----------------------------
53
  mcp_client: Optional[MCPClient] = None
54
  _initialized = False
55
  _init_lock = asyncio.Lock()
56
 
57
+ def _current_system_prompt(style: str) -> str:
58
+ if style == "Concise":
59
+ return BASE_SYSTEM_PROMPT + CONCISE_SUFFIX
60
+ return BASE_SYSTEM_PROMPT + DETAILED_SUFFIX
61
 
62
+ def _reset_client():
63
+ """Reset the global client so a new one is created with updated env (if any)."""
64
+ global mcp_client, _initialized
65
+ mcp_client = None
66
+ _initialized = False
67
+
68
+ def get_mcp_client(model_id: str, provider: str, api_key: Optional[str]) -> MCPClient:
69
  global mcp_client
70
  if mcp_client is None:
71
+ mcp_client = MCPClient(model=model_id, provider=provider, api_key=api_key)
 
 
 
 
72
  return mcp_client
73
 
74
+ async def ensure_init(model_id: str, provider: str, api_key: Optional[str]):
75
+ """Attach the Gradio Docs MCP server exactly once."""
 
 
 
 
76
  global _initialized
77
  if _initialized:
78
  return
 
79
  async with _init_lock:
80
  if _initialized:
81
  return
82
+ client = get_mcp_client(model_id, provider, api_key)
83
+ # add_mcp_server may be a coroutine; await to avoid warnings
84
  await client.add_mcp_server(
85
  type="sse",
86
  url=GRADIO_DOCS_MCP_SSE,
87
+ timeout=45,
88
  )
89
  _initialized = True
90
 
 
91
  # ----------------------------
92
+ # Message helpers
93
  # ----------------------------
94
+ def to_llm_messages(history_msgs: List[Dict[str, Any]], user_msg: str, style: str) -> List[Dict[str, Any]]:
95
  """
96
  Convert Chatbot messages list (role/content dicts) to the LLM format,
97
  with a system message prepended and the new user message appended.
98
  """
99
+ msgs: List[Dict[str, Any]] = [{"role": "system", "content": _current_system_prompt(style)}]
100
  for m in history_msgs or []:
101
  role = m.get("role")
102
  content = m.get("content")
 
105
  msgs.append({"role": "user", "content": user_msg})
106
  return msgs
107
 
108
+ # ----------------------------
109
+ # Streaming + side-panels (tool logs & citations)
110
+ # ----------------------------
111
+ def _append_log(log_lines: List[str], line: str, max_lines: int = 200) -> None:
112
+ log_lines.append(line)
113
+ if len(log_lines) > max_lines:
114
+ del log_lines[: len(log_lines) - max_lines]
115
+
116
+ def _format_tool_log(log_lines: List[str]) -> str:
117
+ if not log_lines:
118
+ return "_No tool activity yet._"
119
+ return "\n".join(log_lines)
120
 
121
+ def _format_citations(cites: List[Tuple[str, Optional[str]]]) -> str:
122
+ if not cites:
123
+ return "_No citations captured yet._"
124
+ # Show up to the last 12 citations (tool name + optional URL/source)
125
+ last = cites[-12:]
126
+ lines = []
127
+ for (label, url) in last:
128
+ if url:
129
+ lines.append(f"- **{label}** β€” {url}")
130
+ else:
131
+ lines.append(f"- **{label}**")
132
+ return "\n".join(lines)
133
+
134
+ async def stream_answer(
135
+ messages_for_llm: List[Dict[str, Any]],
136
+ model_id: str,
137
+ provider: str,
138
+ api_key: Optional[str],
139
+ ) -> Iterable[Dict[str, Any]]:
140
  """
141
  Stream deltas and tool logs from MCPClient.process_single_turn_with_tools.
142
+ Yield dicts with partial text plus updated tool logs & citations so the UI can update three panels.
143
  """
144
+ await ensure_init(model_id, provider, api_key)
145
+ client = get_mcp_client(model_id, provider, api_key)
146
+
147
+ tool_log: List[str] = []
148
+ citations: List[Tuple[str, Optional[str]]] = [] # (label, url)
149
+
150
+ # Early token check
151
+ if not api_key:
152
+ yield {
153
+ "delta": (
154
+ "⚠️ Missing token: set `HUGGING_FACE_HUB_TOKEN` or `HF_TOKEN` "
155
+ "in your Space **Settings β†’ Secrets** to authenticate model calls."
156
+ ),
157
+ "tool_log": _format_tool_log(tool_log),
158
+ "citations": _format_citations(citations),
159
+ }
160
  return
161
 
162
  try:
163
  async for chunk in client.process_single_turn_with_tools(messages_for_llm):
164
  if isinstance(chunk, dict):
165
  ctype = chunk.get("type")
166
+
167
  if ctype == "tool_log":
168
+ # Example: {"type": "tool_log", "tool": "gradio_docs_mcp_search_gradio_docs", "status": "started/finished"}
169
  name = chunk.get("tool", "tool")
170
  status = chunk.get("status", "")
171
+ _append_log(tool_log, f"- {name} **{status}**")
172
+ yield {"delta": "", "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
173
+
174
  elif ctype == "text_delta":
175
+ yield {"delta": chunk.get("delta", ""), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
176
+
177
  elif ctype == "text":
178
+ yield {"delta": chunk.get("text", ""), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
179
+
180
  elif ctype == "tool_result":
181
+ # Try to capture a useful citation label and optional URL if present
182
+ tool_name = chunk.get("tool", "tool")
183
  content = chunk.get("content")
184
+ url = None
185
+ if isinstance(content, dict):
186
+ # Some servers return {"url": "...", "title": "...", ...}
187
+ url = content.get("url") or content.get("link")
188
+ title = content.get("title") or content.get("name")
189
+ label = title or tool_name
190
+ elif isinstance(content, str):
191
+ # Heuristic: look for a URL-ish substring
192
+ label = tool_name
193
+ # very light heuristic (not strict URL parse)
194
+ if "http://" in content or "https://" in content:
195
+ start = content.find("http")
196
+ # cut a short preview
197
+ url = content[start : start + 200].split("\n")[0].strip()
198
+ else:
199
+ label = tool_name
200
+
201
+ citations.append((label, url))
202
+ _append_log(tool_log, f" β€’ {tool_name} returned result")
203
+ # Also echo a short "Result:" block into the chat for transparency (truncated)
204
+ snippet = ""
205
+ if isinstance(content, str):
206
+ snippet = content.strip()
207
+ if len(snippet) > 700:
208
+ snippet = snippet[:700] + "…"
209
+ snippet = f"\n\n**Result (from {tool_name}):**\n{snippet}"
210
+ yield {
211
+ "delta": snippet,
212
+ "tool_log": _format_tool_log(tool_log),
213
+ "citations": _format_citations(citations),
214
+ }
215
+
216
  else:
217
+ # Fallback if provider yields plain strings
218
+ yield {"delta": str(chunk), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
219
+
220
  except Exception as e:
221
  msg = str(e)
 
222
  if "401" in msg or "Unauthorized" in msg:
223
+ err = (
224
+ "❌ Unauthorized (401). Ensure your token is set and permitted for the chosen model/provider.\n"
225
+ f"- Model: `{model_id}`\n- Provider: `{provider}`\n"
 
 
 
 
 
 
 
 
226
  )
227
  else:
228
+ err = f"❌ Error: {msg}"
229
+ yield {"delta": err, "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
230
 
231
  # ----------------------------
232
  # Gradio UI
233
  # ----------------------------
234
  with gr.Blocks(fill_height=True) as demo:
235
  gr.Markdown(
236
+ "# πŸ€– Gradio Docs Chat (MCP Client)\n"
237
+ "Ask anything about **Gradio**. Answers are grounded in the official docs via MCP tools."
 
 
 
 
 
 
238
  )
239
 
240
  with gr.Row():
241
+ with gr.Column(scale=7):
242
+ chat = gr.Chatbot(
243
+ label="Gradio Docs Assistant",
244
+ height=520,
245
+ type="messages", # expects: [{"role": "...", "content": "..."}]
246
+ )
247
+ with gr.Row():
248
+ msg = gr.Textbox(
249
+ placeholder="e.g., How do I use gr.Interface with multiple inputs?",
250
+ scale=9,
251
+ autofocus=True,
252
+ )
253
+ send_btn = gr.Button("Send", scale=1, variant="primary")
254
 
255
+ clear = gr.ClearButton(components=[chat, msg], value="Clear")
 
 
 
 
256
 
257
+ with gr.Column(scale=5):
258
+ with gr.Accordion("βš™οΈ Settings", open=False):
259
+ style = gr.Radio(
260
+ label="Answer Style",
261
+ choices=["Concise", "Detailed"],
262
+ value="Detailed",
263
+ )
264
+ model_read = gr.Markdown(
265
+ f"**Model:** `{MODEL_ID}` \n**Provider:** `{PROVIDER}` \n"
266
+ "_(override via Space Secrets: `CHAT_MODEL`, `CHAT_PROVIDER`)_"
267
+ )
268
+
269
+ with gr.Accordion("πŸ›  Tool Activity (live)", open=True):
270
+ tool_log_md = gr.Markdown("_No tool activity yet._")
271
+
272
+ with gr.Accordion("πŸ“Ž Citations (recent)", open=True):
273
+ citations_md = gr.Markdown("_No citations captured yet._")
274
+
275
+ async def on_submit(user_msg: str, history_msgs: List[Dict[str, Any]], style_choice: str):
276
  """
277
+ Append the user's message, then stream the assistant reply while updating:
278
+ - chat text
279
+ - tool activity
280
+ - citations
281
  """
282
+ # Start a new assistant message for streaming
283
  history_msgs = (history_msgs or []) + [{"role": "user", "content": user_msg}]
284
  history_msgs.append({"role": "assistant", "content": ""})
285
+ yield history_msgs, gr.update(value="_No tool activity yet._"), gr.update(value="_No citations captured yet._")
286
+
287
+ # Compose messages for LLM
288
+ messages_for_llm = to_llm_messages(history_msgs[:-1], user_msg, style_choice)
289
 
290
+ async for chunk in stream_answer(messages_for_llm, MODEL_ID, PROVIDER, HF_TOKEN):
291
+ delta = chunk.get("delta", "")
292
+ if delta:
293
+ history_msgs[-1]["content"] += delta
294
+ yield history_msgs, gr.update(value=chunk.get("tool_log", "")), gr.update(value=chunk.get("citations", ""))
295
 
296
+ # Wire both Enter and button click; also pass "style"
297
+ msg.submit(on_submit, inputs=[msg, chat, style], outputs=[chat, tool_log_md, citations_md], queue=True)
298
+ send_btn.click(on_submit, inputs=[msg, chat, style], outputs=[chat, tool_log_md, citations_md], queue=True)
299
 
300
+ # For local dev; on Spaces, Gradio calls launch automatically.
301
  if __name__ == "__main__":
302
  demo.launch()