Spaces:

sudipta26889
/

gradio-doc

Sleeping

App Files Files Community

sudipta26889 commited on 17 days ago

Commit

6d7a07a

verified ·

1 Parent(s): 51bd84d

Update app.py

Browse files

Files changed (1) hide show

app.py +196 -96

app.py CHANGED Viewed

@@ -1,15 +1,25 @@
 # app.py
 # Hugging Face Space: Gradio app that chats about Gradio docs via the Gradio Docs MCP server.
 # Requirements:
 #   - gradio
 #   - huggingface_hub
 #
-# Space secret needed:
-#   - HUGGING_FACE_HUB_TOKEN or HF_TOKEN  (must have access to provider="novita")
 import os
 import asyncio
-from typing import Any, Dict, Iterable, List, Optional
 import gradio as gr
 from huggingface_hub import MCPClient
@@ -22,73 +32,71 @@ GRADIO_DOCS_MCP_SSE = os.environ.get(
     "https://gradio-docs-mcp.hf.space/gradio_api/mcp/sse",
 )
-# Use Novita provider + the model you specified
-MODEL_ID = os.environ.get("CHAT_MODEL", "openai/gpt-oss-20b")
-PROVIDER = os.environ.get("CHAT_PROVIDER", "novita")  # <-- IMPORTANT
-# Accept either env name
 HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
-SYSTEM_PROMPT = (
-    "You are a helpful assistant that answers questions strictly using the Gradio documentation "
     "via the MCP tools provided by the Gradio Docs MCP server. Prefer the latest docs. "
-    "When helpful, cite classes/functions (e.g., gr.Interface) and include short code examples."
 )
 # ----------------------------
-# MCP Client (lazy init)
 # ----------------------------
 mcp_client: Optional[MCPClient] = None
 _initialized = False
 _init_lock = asyncio.Lock()
-def get_mcp_client() -> MCPClient:
-    """
-    Create a single global MCPClient configured to use provider='novita'
-    and the given MODEL_ID. MCPClient internally uses huggingface_hub's
-    InferenceClient, so this matches your direct-Novita snippet.
-    """
     global mcp_client
     if mcp_client is None:
-        mcp_client = MCPClient(
-            model=MODEL_ID,
-            provider=PROVIDER,  # novita
-            api_key=HF_TOKEN,   # token must permit novita access
-        )
     return mcp_client
-async def ensure_init():
-    """
-    Lazily attach the Gradio Docs MCP server. In some huggingface_hub versions
-    add_mcp_server is async, so it must be awaited exactly once.
-    """
     global _initialized
     if _initialized:
         return
     async with _init_lock:
         if _initialized:
             return
-        client = get_mcp_client()
-        # Await the coroutine to avoid "was never awaited" warnings
         await client.add_mcp_server(
             type="sse",
             url=GRADIO_DOCS_MCP_SSE,
-            timeout=30,
         )
         _initialized = True
 # ----------------------------
-# Helpers for messages
 # ----------------------------
-def to_llm_messages(history_msgs: List[Dict[str, Any]], user_msg: str) -> List[Dict[str, Any]]:
     """
     Convert Chatbot messages list (role/content dicts) to the LLM format,
     with a system message prepended and the new user message appended.
     """
-    msgs: List[Dict[str, Any]] = [{"role": "system", "content": SYSTEM_PROMPT}]
     for m in history_msgs or []:
         role = m.get("role")
         content = m.get("content")
@@ -97,106 +105,198 @@ def to_llm_messages(history_msgs: List[Dict[str, Any]], user_msg: str) -> List[D
     msgs.append({"role": "user", "content": user_msg})
     return msgs
-async def stream_answer(messages_for_llm: List[Dict[str, Any]]) -> Iterable[str]:
     """
     Stream deltas and tool logs from MCPClient.process_single_turn_with_tools.
     """
-    await ensure_init()
-    client = get_mcp_client()
-    # Pre-flight checks
-    if not HF_TOKEN:
-        yield (
-            "⚠️ Missing token: set `HF_TOKEN` (or `HUGGING_FACE_HUB_TOKEN`) "
-            "in your Space **Settings → Secrets**. The token must allow provider='novita'."
-        )
         return
     try:
         async for chunk in client.process_single_turn_with_tools(messages_for_llm):
             if isinstance(chunk, dict):
                 ctype = chunk.get("type")
                 if ctype == "tool_log":
                     name = chunk.get("tool", "tool")
                     status = chunk.get("status", "")
-                    yield f"\n\n_(using **{name}** {status})_"
                 elif ctype == "text_delta":
-                    yield chunk.get("delta", "")
                 elif ctype == "text":
-                    yield chunk.get("text", "")
                 elif ctype == "tool_result":
                     content = chunk.get("content")
-                    if isinstance(content, str) and content.strip():
-                        yield f"\n\n**Result:**\n{content}"
             else:
-                yield str(chunk)
     except Exception as e:
         msg = str(e)
-        # Common failure modes
         if "401" in msg or "Unauthorized" in msg:
-            yield (
-                "❌ Unauthorized (401). Your model call was rejected by Novita.\n\n"
-                "- Ensure the Space secret `HF_TOKEN` is set and valid.\n"
-                "- Confirm `HF_TOKEN` has access to provider='novita' and the model.\n"
-            )
-        elif "400" in msg or "Bad Request" in msg:
-            yield (
-                "❌ Bad Request (400). The Novita endpoint rejected the request.\n\n"
-                "- Double-check `CHAT_MODEL` (currently "
-                f"`{MODEL_ID}`) is valid for provider='novita'.\n"
-                "- Ensure your `HF_TOKEN` has the necessary permissions.\n"
             )
         else:
-            yield f"❌ Error: {msg}"
 # ----------------------------
 # Gradio UI
 # ----------------------------
 with gr.Blocks(fill_height=True) as demo:
     gr.Markdown(
-        "# 🤖 Gradio Docs Chat (MCP Client via Novita)\n"
-        "Ask anything about **Gradio**. Answers are grounded in the official docs via MCP."
-    )
-    chat = gr.Chatbot(
-        label="Gradio Docs Assistant",
-        height=520,
-        type="messages",   # expects: [{"role": "...", "content": "..."}]
     )
     with gr.Row():
-        msg = gr.Textbox(
-            placeholder="e.g., How do I use gr.Interface with multiple inputs?",
-            scale=9,
-            autofocus=True,
-        )
-        send_btn = gr.Button("Send", scale=1, variant="primary")
-    with gr.Row():
-        clear = gr.ClearButton(components=[chat, msg], value="Clear")
-        info = gr.Markdown(
-            f"**Model:** `{MODEL_ID}` · **Provider:** `{PROVIDER}` · **MCP:** Gradio Docs SSE",
-        )
-    async def on_submit(user_msg: str, history_msgs: List[Dict[str, Any]]):
         """
-        history_msgs is a list of {"role": ..., "content": ...} dicts.
-        We append the user's message, then stream the assistant reply by
-        updating the last assistant message content.
         """
         history_msgs = (history_msgs or []) + [{"role": "user", "content": user_msg}]
         history_msgs.append({"role": "assistant", "content": ""})
-        yield history_msgs
-        messages_for_llm = to_llm_messages(history_msgs[:-1], user_msg)
-        async for delta in stream_answer(messages_for_llm):
-            history_msgs[-1]["content"] += delta
-            yield history_msgs
-    msg.submit(on_submit, inputs=[msg, chat], outputs=chat, queue=True)
-    send_btn.click(on_submit, inputs=[msg, chat], outputs=chat, queue=True)
 if __name__ == "__main__":
     demo.launch()

 # app.py
 # Hugging Face Space: Gradio app that chats about Gradio docs via the Gradio Docs MCP server.
+# Features:
+#   • MCP tool-calling (connects to the official Gradio Docs MCP SSE server)
+#   • Streaming responses with live tool logs
+#   • Optional "Concise / Detailed" answer style
+#   • Lightweight citations panel (summarizes MCP tool hits)
+#
 # Requirements:
 #   - gradio
 #   - huggingface_hub
 #
+# Space secrets needed:
+#   - HUGGING_FACE_HUB_TOKEN or HF_TOKEN  (token must allow using the selected provider/model)
+#
+# Notes:
+#   - The default model/provider below are known to work with chat + tool calling via the HF router.
+#   - You can override via Space Secrets: CHAT_MODEL, CHAT_PROVIDER.
 import os
 import asyncio
+from typing import Any, Dict, Iterable, List, Optional, Tuple
 import gradio as gr
 from huggingface_hub import MCPClient
     "https://gradio-docs-mcp.hf.space/gradio_api/mcp/sse",
 )
+# Use a router model that supports OpenAI-style chat + tool calling.
+# You can override these with Space Secrets if you like.
+MODEL_ID = os.environ.get("CHAT_MODEL", "Qwen/Qwen2.5-7B-Instruct")
+PROVIDER = os.environ.get("CHAT_PROVIDER", "hf-inference")  # router
 HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
+BASE_SYSTEM_PROMPT = (
+    "You are a helpful assistant that answers strictly using the Gradio documentation "
     "via the MCP tools provided by the Gradio Docs MCP server. Prefer the latest docs. "
+    "Cite relevant class/function names (e.g., gr.Interface) and include short code examples when helpful."
 )
+CONCISE_SUFFIX = " Keep answers concise (3–6 sentences) unless code is necessary."
+DETAILED_SUFFIX = " Provide a detailed, step-by-step answer with short code where helpful."
 # ----------------------------
+# MCP Client (lazy init, reconfigurable)
 # ----------------------------
 mcp_client: Optional[MCPClient] = None
 _initialized = False
 _init_lock = asyncio.Lock()
+def _current_system_prompt(style: str) -> str:
+    if style == "Concise":
+        return BASE_SYSTEM_PROMPT + CONCISE_SUFFIX
+    return BASE_SYSTEM_PROMPT + DETAILED_SUFFIX
+def _reset_client():
+    """Reset the global client so a new one is created with updated env (if any)."""
+    global mcp_client, _initialized
+    mcp_client = None
+    _initialized = False
+def get_mcp_client(model_id: str, provider: str, api_key: Optional[str]) -> MCPClient:
     global mcp_client
     if mcp_client is None:
+        mcp_client = MCPClient(model=model_id, provider=provider, api_key=api_key)
     return mcp_client
+async def ensure_init(model_id: str, provider: str, api_key: Optional[str]):
+    """Attach the Gradio Docs MCP server exactly once."""
     global _initialized
     if _initialized:
         return
     async with _init_lock:
         if _initialized:
             return
+        client = get_mcp_client(model_id, provider, api_key)
+        # add_mcp_server may be a coroutine; await to avoid warnings
         await client.add_mcp_server(
             type="sse",
             url=GRADIO_DOCS_MCP_SSE,
+            timeout=45,
         )
         _initialized = True
 # ----------------------------
+# Message helpers
 # ----------------------------
+def to_llm_messages(history_msgs: List[Dict[str, Any]], user_msg: str, style: str) -> List[Dict[str, Any]]:
     """
     Convert Chatbot messages list (role/content dicts) to the LLM format,
     with a system message prepended and the new user message appended.
     """
+    msgs: List[Dict[str, Any]] = [{"role": "system", "content": _current_system_prompt(style)}]
     for m in history_msgs or []:
         role = m.get("role")
         content = m.get("content")
     msgs.append({"role": "user", "content": user_msg})
     return msgs
+# ----------------------------
+# Streaming + side-panels (tool logs & citations)
+# ----------------------------
+def _append_log(log_lines: List[str], line: str, max_lines: int = 200) -> None:
+    log_lines.append(line)
+    if len(log_lines) > max_lines:
+        del log_lines[: len(log_lines) - max_lines]
+def _format_tool_log(log_lines: List[str]) -> str:
+    if not log_lines:
+        return "_No tool activity yet._"
+    return "\n".join(log_lines)
+def _format_citations(cites: List[Tuple[str, Optional[str]]]) -> str:
+    if not cites:
+        return "_No citations captured yet._"
+    # Show up to the last 12 citations (tool name + optional URL/source)
+    last = cites[-12:]
+    lines = []
+    for (label, url) in last:
+        if url:
+            lines.append(f"- **{label}** — {url}")
+        else:
+            lines.append(f"- **{label}**")
+    return "\n".join(lines)
+async def stream_answer(
+    messages_for_llm: List[Dict[str, Any]],
+    model_id: str,
+    provider: str,
+    api_key: Optional[str],
+) -> Iterable[Dict[str, Any]]:
     """
     Stream deltas and tool logs from MCPClient.process_single_turn_with_tools.
+    Yield dicts with partial text plus updated tool logs & citations so the UI can update three panels.
     """
+    await ensure_init(model_id, provider, api_key)
+    client = get_mcp_client(model_id, provider, api_key)
+    tool_log: List[str] = []
+    citations: List[Tuple[str, Optional[str]]] = []  # (label, url)
+    # Early token check
+    if not api_key:
+        yield {
+            "delta": (
+                "⚠️ Missing token: set `HUGGING_FACE_HUB_TOKEN` or `HF_TOKEN` "
+                "in your Space **Settings → Secrets** to authenticate model calls."
+            ),
+            "tool_log": _format_tool_log(tool_log),
+            "citations": _format_citations(citations),
+        }
         return
     try:
         async for chunk in client.process_single_turn_with_tools(messages_for_llm):
             if isinstance(chunk, dict):
                 ctype = chunk.get("type")
                 if ctype == "tool_log":
+                    # Example: {"type": "tool_log", "tool": "gradio_docs_mcp_search_gradio_docs", "status": "started/finished"}
                     name = chunk.get("tool", "tool")
                     status = chunk.get("status", "")
+                    _append_log(tool_log, f"- {name} **{status}**")
+                    yield {"delta": "", "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
                 elif ctype == "text_delta":
+                    yield {"delta": chunk.get("delta", ""), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
                 elif ctype == "text":
+                    yield {"delta": chunk.get("text", ""), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
                 elif ctype == "tool_result":
+                    # Try to capture a useful citation label and optional URL if present
+                    tool_name = chunk.get("tool", "tool")
                     content = chunk.get("content")
+                    url = None
+                    if isinstance(content, dict):
+                        # Some servers return {"url": "...", "title": "...", ...}
+                        url = content.get("url") or content.get("link")
+                        title = content.get("title") or content.get("name")
+                        label = title or tool_name
+                    elif isinstance(content, str):
+                        # Heuristic: look for a URL-ish substring
+                        label = tool_name
+                        # very light heuristic (not strict URL parse)
+                        if "http://" in content or "https://" in content:
+                            start = content.find("http")
+                            # cut a short preview
+                            url = content[start : start + 200].split("\n")[0].strip()
+                    else:
+                        label = tool_name
+                    citations.append((label, url))
+                    _append_log(tool_log, f"  • {tool_name} returned result")
+                    # Also echo a short "Result:" block into the chat for transparency (truncated)
+                    snippet = ""
+                    if isinstance(content, str):
+                        snippet = content.strip()
+                        if len(snippet) > 700:
+                            snippet = snippet[:700] + "…"
+                        snippet = f"\n\n**Result (from {tool_name}):**\n{snippet}"
+                    yield {
+                        "delta": snippet,
+                        "tool_log": _format_tool_log(tool_log),
+                        "citations": _format_citations(citations),
+                    }
             else:
+                # Fallback if provider yields plain strings
+                yield {"delta": str(chunk), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
     except Exception as e:
         msg = str(e)
         if "401" in msg or "Unauthorized" in msg:
+            err = (
+                "❌ Unauthorized (401). Ensure your token is set and permitted for the chosen model/provider.\n"
+                f"- Model: `{model_id}`\n- Provider: `{provider}`\n"
             )
         else:
+            err = f"❌ Error: {msg}"
+        yield {"delta": err, "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
 # ----------------------------
 # Gradio UI
 # ----------------------------
 with gr.Blocks(fill_height=True) as demo:
     gr.Markdown(
+        "# 🤖 Gradio Docs Chat (MCP Client)\n"
+        "Ask anything about **Gradio**. Answers are grounded in the official docs via MCP tools."
     )
     with gr.Row():
+        with gr.Column(scale=7):
+            chat = gr.Chatbot(
+                label="Gradio Docs Assistant",
+                height=520,
+                type="messages",   # expects: [{"role": "...", "content": "..."}]
+            )
+            with gr.Row():
+                msg = gr.Textbox(
+                    placeholder="e.g., How do I use gr.Interface with multiple inputs?",
+                    scale=9,
+                    autofocus=True,
+                )
+                send_btn = gr.Button("Send", scale=1, variant="primary")
+            clear = gr.ClearButton(components=[chat, msg], value="Clear")
+        with gr.Column(scale=5):
+            with gr.Accordion("⚙️ Settings", open=False):
+                style = gr.Radio(
+                    label="Answer Style",
+                    choices=["Concise", "Detailed"],
+                    value="Detailed",
+                )
+                model_read = gr.Markdown(
+                    f"**Model:** `{MODEL_ID}`  \n**Provider:** `{PROVIDER}`  \n"
+                    "_(override via Space Secrets: `CHAT_MODEL`, `CHAT_PROVIDER`)_"
+                )
+            with gr.Accordion("🛠 Tool Activity (live)", open=True):
+                tool_log_md = gr.Markdown("_No tool activity yet._")
+            with gr.Accordion("📎 Citations (recent)", open=True):
+                citations_md = gr.Markdown("_No citations captured yet._")
+    async def on_submit(user_msg: str, history_msgs: List[Dict[str, Any]], style_choice: str):
         """
+        Append the user's message, then stream the assistant reply while updating:
+          - chat text
+          - tool activity
+          - citations
         """
+        # Start a new assistant message for streaming
         history_msgs = (history_msgs or []) + [{"role": "user", "content": user_msg}]
         history_msgs.append({"role": "assistant", "content": ""})
+        yield history_msgs, gr.update(value="_No tool activity yet._"), gr.update(value="_No citations captured yet._")
+        # Compose messages for LLM
+        messages_for_llm = to_llm_messages(history_msgs[:-1], user_msg, style_choice)
+        async for chunk in stream_answer(messages_for_llm, MODEL_ID, PROVIDER, HF_TOKEN):
+            delta = chunk.get("delta", "")
+            if delta:
+                history_msgs[-1]["content"] += delta
+            yield history_msgs, gr.update(value=chunk.get("tool_log", "")), gr.update(value=chunk.get("citations", ""))
+    # Wire both Enter and button click; also pass "style"
+    msg.submit(on_submit, inputs=[msg, chat, style], outputs=[chat, tool_log_md, citations_md], queue=True)
+    send_btn.click(on_submit, inputs=[msg, chat, style], outputs=[chat, tool_log_md, citations_md], queue=True)
+# For local dev; on Spaces, Gradio calls launch automatically.
 if __name__ == "__main__":
     demo.launch()