File size: 12,052 Bytes
e1c5ae4
 
6d7a07a
 
 
 
 
 
d62a2bb
 
 
1c5b9ef
6d7a07a
 
 
 
 
d62a2bb
e1c5ae4
0102b23
e1c5ae4
6d7a07a
0102b23
 
 
 
e1c5ae4
 
 
 
 
 
 
0102b23
6d7a07a
 
 
51bd84d
0102b23
6d7a07a
 
e1c5ae4
6d7a07a
e1c5ae4
6d7a07a
 
 
e1c5ae4
6d7a07a
e1c5ae4
 
 
 
0102b23
6d7a07a
d62a2bb
0102b23
6d7a07a
 
 
 
 
 
 
e1c5ae4
 
6d7a07a
e1c5ae4
0102b23
6d7a07a
 
e1c5ae4
 
 
 
 
 
6d7a07a
 
e1c5ae4
 
 
6d7a07a
e1c5ae4
 
 
 
6d7a07a
e1c5ae4
6d7a07a
e1c5ae4
1c5b9ef
 
e1c5ae4
6d7a07a
51bd84d
1c5b9ef
 
 
 
 
 
 
6d7a07a
 
 
 
 
 
 
 
 
 
 
 
1c5b9ef
6d7a07a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0102b23
e1c5ae4
6d7a07a
0102b23
6d7a07a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e1c5ae4
 
 
1c5b9ef
e1c5ae4
 
6d7a07a
e1c5ae4
 
 
6d7a07a
 
 
e1c5ae4
6d7a07a
 
e1c5ae4
6d7a07a
 
e1c5ae4
6d7a07a
e1c5ae4
6d7a07a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e1c5ae4
6d7a07a
 
e1c5ae4
 
 
6d7a07a
 
 
e1c5ae4
0102b23
6d7a07a
 
0102b23
e1c5ae4
 
 
0102b23
e1c5ae4
6d7a07a
 
e1c5ae4
 
0102b23
6d7a07a
 
 
 
9ed206f
6d7a07a
 
 
 
 
 
 
 
0102b23
6d7a07a
e1c5ae4
6d7a07a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c5b9ef
6d7a07a
 
 
 
1c5b9ef
 
 
6d7a07a
 
 
1c5b9ef
6d7a07a
 
 
 
 
0102b23
6d7a07a
 
0102b23
d62a2bb
 
 
9ed206f
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
# app.py
# Hugging Face Space: Gradio app that chats about Gradio docs via the Gradio Docs MCP server.
# Features:
#   β€’ MCP tool-calling (connects to the official Gradio Docs MCP SSE server)
#   β€’ Streaming responses with live tool logs
#   β€’ Optional "Concise / Detailed" answer style
#   β€’ Lightweight citations panel (summarizes MCP tool hits)
#
# Requirements (in requirements.txt):
#   gradio>=4.44.0
#   huggingface_hub>=0.24.0
#
# Space secrets needed:
#   - HUGGING_FACE_HUB_TOKEN or HF_TOKEN  (token must allow using the selected provider/model)
#
# Notes:
#   - The default model/provider below are known to work with chat + tool calling via the HF router.
#   - You can override via Space Secrets: CHAT_MODEL, CHAT_PROVIDER, GRADIO_DOCS_MCP_SSE.

import os
import asyncio
from typing import Any, Dict, Iterable, List, Optional, Tuple

import gradio as gr
from huggingface_hub import MCPClient

# ----------------------------
# Configuration
# ----------------------------
GRADIO_DOCS_MCP_SSE = os.environ.get(
    "GRADIO_DOCS_MCP_SSE",
    "https://gradio-docs-mcp.hf.space/gradio_api/mcp/sse",
)

# Use a router model that supports OpenAI-style chat + tool calling.
MODEL_ID = os.environ.get("CHAT_MODEL", "Qwen/Qwen2.5-7B-Instruct")
PROVIDER = os.environ.get("CHAT_PROVIDER", "hf-inference")  # router
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")

BASE_SYSTEM_PROMPT = (
    "You are a helpful assistant that answers strictly using the Gradio documentation "
    "via the MCP tools provided by the Gradio Docs MCP server. Prefer the latest docs. "
    "Cite relevant class/function names (e.g., gr.Interface) and include short code examples when helpful."
)
CONCISE_SUFFIX = " Keep answers concise (3–6 sentences) unless code is necessary."
DETAILED_SUFFIX = " Provide a detailed, step-by-step answer with short code where helpful."

# ----------------------------
# MCP Client (lazy init, reconfigurable)
# ----------------------------
mcp_client: Optional[MCPClient] = None
_initialized = False
_init_lock = asyncio.Lock()

def _current_system_prompt(style: str) -> str:
    return BASE_SYSTEM_PROMPT + (CONCISE_SUFFIX if style == "Concise" else DETAILED_SUFFIX)

def _reset_client():
    """Reset the global client so a new one is created with updated env (if any)."""
    global mcp_client, _initialized
    mcp_client = None
    _initialized = False

def get_mcp_client(model_id: str, provider: str, api_key: Optional[str]) -> MCPClient:
    global mcp_client
    if mcp_client is None:
        mcp_client = MCPClient(model=model_id, provider=provider, api_key=api_key)
    return mcp_client

async def ensure_init(model_id: str, provider: str, api_key: Optional[str]):
    """Attach the Gradio Docs MCP server exactly once."""
    global _initialized
    if _initialized:
        return
    async with _init_lock:
        if _initialized:
            return
        client = get_mcp_client(model_id, provider, api_key)
        # add_mcp_server may be a coroutine; await to avoid warnings
        await client.add_mcp_server(
            type="sse",
            url=GRADIO_DOCS_MCP_SSE,
            timeout=45,
        )
        _initialized = True

# ----------------------------
# Message helpers
# ----------------------------
def to_llm_messages(history_msgs: List[Dict[str, Any]], user_msg: str, style: str) -> List[Dict[str, Any]]:
    """
    Convert Chatbot messages list (role/content dicts) to the LLM format,
    with a system message prepended and the new user message appended.
    """
    msgs: List[Dict[str, Any]] = [{"role": "system", "content": _current_system_prompt(style)}]
    for m in history_msgs or []:
        role = m.get("role")
        content = m.get("content")
        if role in ("user", "assistant") and isinstance(content, str):
            msgs.append({"role": role, "content": content})
    msgs.append({"role": "user", "content": user_msg})
    return msgs

# ----------------------------
# Streaming + side-panels (tool logs & citations)
# ----------------------------
def _append_log(log_lines: List[str], line: str, max_lines: int = 200) -> None:
    log_lines.append(line)
    if len(log_lines) > max_lines:
        del log_lines[: len(log_lines) - max_lines]

def _format_tool_log(log_lines: List[str]) -> str:
    if not log_lines:
        return "_No tool activity yet._"
    return "\n".join(log_lines)

def _format_citations(cites: List[Tuple[str, Optional[str]]]) -> str:
    if not cites:
        return "_No citations captured yet._"
    # Show up to the last 12 citations (tool name + optional URL/source)
    last = cites[-12:]
    lines = []
    for (label, url) in last:
        if url:
            lines.append(f"- **{label}** β€” {url}")
        else:
            lines.append(f"- **{label}**")
    return "\n".join(lines)

async def stream_answer(
    messages_for_llm: List[Dict[str, Any]],
    model_id: str,
    provider: str,
    api_key: Optional[str],
) -> Iterable[Dict[str, Any]]:
    """
    Stream deltas and tool logs from MCPClient.process_single_turn_with_tools.
    Yield dicts with partial text plus updated tool logs & citations so the UI can update three panels.
    """
    await ensure_init(model_id, provider, api_key)
    client = get_mcp_client(model_id, provider, api_key)

    tool_log: List[str] = []
    citations: List[Tuple[str, Optional[str]]] = []  # (label, url)

    # Early token check
    if not api_key:
        yield {
            "delta": (
                "⚠️ Missing token: set `HUGGING_FACE_HUB_TOKEN` or `HF_TOKEN` "
                "in your Space **Settings β†’ Secrets** to authenticate model calls."
            ),
            "tool_log": _format_tool_log(tool_log),
            "citations": _format_citations(citations),
        }
        return

    try:
        async for chunk in client.process_single_turn_with_tools(messages_for_llm):
            if isinstance(chunk, dict):
                ctype = chunk.get("type")

                if ctype == "tool_log":
                    name = chunk.get("tool", "tool")
                    status = chunk.get("status", "")
                    _append_log(tool_log, f"- {name} **{status}**")
                    yield {"delta": "", "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}

                elif ctype == "text_delta":
                    yield {"delta": chunk.get("delta", ""), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}

                elif ctype == "text":
                    yield {"delta": chunk.get("text", ""), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}

                elif ctype == "tool_result":
                    tool_name = chunk.get("tool", "tool")
                    content = chunk.get("content")
                    url = None
                    if isinstance(content, dict):
                        url = content.get("url") or content.get("link")
                        title = content.get("title") or content.get("name")
                        label = title or tool_name
                    elif isinstance(content, str):
                        label = tool_name
                        if "http://" in content or "https://" in content:
                            start = content.find("http")
                            url = content[start : start + 200].split("\n")[0].strip()
                    else:
                        label = tool_name

                    citations.append((label, url))
                    _append_log(tool_log, f"  β€’ {tool_name} returned result")
                    snippet = ""
                    if isinstance(content, str):
                        snippet = content.strip()
                        if len(snippet) > 700:
                            snippet = snippet[:700] + "…"
                        snippet = f"\n\n**Result (from {tool_name}):**\n{snippet}"
                    yield {
                        "delta": snippet,
                        "tool_log": _format_tool_log(tool_log),
                        "citations": _format_citations(citations),
                    }

            else:
                yield {"delta": str(chunk), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}

    except Exception as e:
        msg = str(e)
        if "401" in msg or "Unauthorized" in msg:
            err = (
                "❌ Unauthorized (401). Ensure your token is set and permitted for the chosen model/provider.\n"
                f"- Model: `{model_id}`\n- Provider: `{provider}`\n"
            )
        else:
            err = f"❌ Error: {msg}"
        yield {"delta": err, "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}

# ----------------------------
# Gradio UI
# ----------------------------
with gr.Blocks(fill_height=True) as demo:
    gr.Markdown(
        "# πŸ€– Gradio Docs Chat (MCP Client)\n"
        "Ask anything about **Gradio**. Answers are grounded in the official docs via MCP tools."
    )

    with gr.Row():
        with gr.Column(scale=7):
            chat = gr.Chatbot(
                label="Gradio Docs Assistant",
                height=520,
                type="messages",
            )
            with gr.Row():
                msg = gr.Textbox(
                    placeholder="e.g., How do I use gr.Interface with multiple inputs?",
                    scale=9,
                    autofocus=True,
                )
                send_btn = gr.Button("Send", scale=1, variant="primary")

            clear = gr.ClearButton(components=[chat, msg], value="Clear")

        with gr.Column(scale=5):
            with gr.Accordion("βš™οΈ Settings", open=False):
                style = gr.Radio(
                    label="Answer Style",
                    choices=["Concise", "Detailed"],
                    value="Detailed",
                )
                model_read = gr.Markdown(
                    f"**Model:** `{MODEL_ID}`  \n**Provider:** `{PROVIDER}`  \n"
                    "_(override via Space Secrets: `CHAT_MODEL`, `CHAT_PROVIDER`)_"
                )

            with gr.Accordion("πŸ›  Tool Activity (live)", open=True):
                tool_log_md = gr.Markdown("_No tool activity yet._")

            with gr.Accordion("πŸ“Ž Citations (recent)", open=True):
                citations_md = gr.Markdown("_No citations captured yet._")

    async def on_submit(user_msg: str, history_msgs: List[Dict[str, Any]], style_choice: str):
        """
        Append the user's message, then stream the assistant reply while updating:
          - chat text
          - tool activity
          - citations
        """
        history_msgs = (history_msgs or []) + [{"role": "user", "content": user_msg}]
        history_msgs.append({"role": "assistant", "content": ""})
        yield history_msgs, gr.update(value="_No tool activity yet._"), gr.update(value="_No citations captured yet._")

        messages_for_llm = to_llm_messages(history_msgs[:-1], user_msg, style_choice)

        async for chunk in stream_answer(messages_for_llm, MODEL_ID, PROVIDER, HF_TOKEN):
            delta = chunk.get("delta", "")
            if delta:
                history_msgs[-1]["content"] += delta
            yield history_msgs, gr.update(value=chunk.get("tool_log", "")), gr.update(value=chunk.get("citations", ""))

    msg.submit(on_submit, inputs=[msg, chat, style], outputs=[chat, tool_log_md, citations_md], queue=True)
    send_btn.click(on_submit, inputs=[msg, chat, style], outputs=[chat, tool_log_md, citations_md], queue=True)

# ----------------------------
# Gradio runtime (queue + launch)
# ----------------------------
# IMPORTANT: assign the queued app back to 'demo' for older Gradio versions.
demo = demo.queue(max_size=32)

# Always launch; Spaces runs this script directly.
demo.launch(
    ssr_mode=False,       # disable SSR to avoid Node helper churn
    server_name="0.0.0.0",
    server_port=7860,
)