Spaces:

sudipta26889
/

gradio-doc

Sleeping

App Files Files Community

sudipta26889 commited on 18 days ago

Commit

51bd84d

verified ·

1 Parent(s): 9bb7270

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -28

app.py CHANGED Viewed

@@ -5,7 +5,7 @@
 #   - huggingface_hub
 #
 # Space secret needed:
-#   - HUGGING_FACE_HUB_TOKEN (or HF_TOKEN)
 import os
 import asyncio
@@ -22,10 +22,11 @@ GRADIO_DOCS_MCP_SSE = os.environ.get(
     "https://gradio-docs-mcp.hf.space/gradio_api/mcp/sse",
 )
-# Choose a tool-capable chat model you have access to
 MODEL_ID = os.environ.get("CHAT_MODEL", "openai/gpt-oss-20b")
-PROVIDER = os.environ.get("CHAT_PROVIDER", "hf-inference")  # e.g., "hf-inference"
-HF_TOKEN = os.environ.get("HUGGING_FACE_HUB_TOKEN") or os.environ.get("HF_TOKEN")
 SYSTEM_PROMPT = (
     "You are a helpful assistant that answers questions strictly using the Gradio documentation "
@@ -42,10 +43,18 @@ _init_lock = asyncio.Lock()
 def get_mcp_client() -> MCPClient:
     global mcp_client
     if mcp_client is None:
-        # Pass api_key so provider calls are authenticated (fixes 401)
-        mcp_client = MCPClient(model=MODEL_ID, provider=PROVIDER, api_key=HF_TOKEN)
     return mcp_client
@@ -62,12 +71,11 @@ async def ensure_init():
         if _initialized:
             return
         client = get_mcp_client()
-        # Await the coroutine to avoid "was never awaited" warning
         await client.add_mcp_server(
             type="sse",
             url=GRADIO_DOCS_MCP_SSE,
             timeout=30,
-            # headers={"Authorization": f"Bearer {HF_TOKEN}"}  # Not needed for public server
         )
         _initialized = True
@@ -81,8 +89,7 @@ def to_llm_messages(history_msgs: List[Dict[str, Any]], user_msg: str) -> List[D
     with a system message prepended and the new user message appended.
     """
     msgs: List[Dict[str, Any]] = [{"role": "system", "content": SYSTEM_PROMPT}]
-    # Keep only role/content keys
-    for m in history_msgs:
         role = m.get("role")
         content = m.get("content")
         if role in ("user", "assistant") and isinstance(content, str):
@@ -98,17 +105,16 @@ async def stream_answer(messages_for_llm: List[Dict[str, Any]]) -> Iterable[str]
     await ensure_init()
     client = get_mcp_client()
-    # Helpful pre-flight checks
     if not HF_TOKEN:
         yield (
-            "⚠️ Missing token: set `HUGGING_FACE_HUB_TOKEN` (or `HF_TOKEN`) "
-            "in your Space **Settings → Secrets** to authenticate the model call."
         )
         return
     try:
         async for chunk in client.process_single_turn_with_tools(messages_for_llm):
-            # chunk is a dict describing text deltas and tool activity
             if isinstance(chunk, dict):
                 ctype = chunk.get("type")
                 if ctype == "tool_log":
@@ -124,16 +130,22 @@ async def stream_answer(messages_for_llm: List[Dict[str, Any]]) -> Iterable[str]
                     if isinstance(content, str) and content.strip():
                         yield f"\n\n**Result:**\n{content}"
             else:
-                # Fallback if provider yields plain strings
                 yield str(chunk)
     except Exception as e:
         msg = str(e)
         if "401" in msg or "Unauthorized" in msg:
             yield (
-                "❌ Unauthorized (401). Your model call was rejected.\n\n"
-                "- Ensure the Space secret `HUGGING_FACE_HUB_TOKEN` is set and valid.\n"
-                "- Confirm the selected `CHAT_MODEL` is accessible with your token.\n"
-                "- If using `hf-inference`, verify your org access/limits."
             )
         else:
             yield f"❌ Error: {msg}"
@@ -144,15 +156,14 @@ async def stream_answer(messages_for_llm: List[Dict[str, Any]]) -> Iterable[str]
 # ----------------------------
 with gr.Blocks(fill_height=True) as demo:
     gr.Markdown(
-        "# 🤖 Gradio Docs Chat (MCP Client)\n"
         "Ask anything about **Gradio**. Answers are grounded in the official docs via MCP."
     )
-    # Using the new messages format
     chat = gr.Chatbot(
         label="Gradio Docs Assistant",
         height=520,
-        type="messages",   # expects a list of dicts: {"role": "...", "content": "..."}
     )
     with gr.Row():
@@ -173,24 +184,19 @@ with gr.Blocks(fill_height=True) as demo:
         """
         history_msgs is a list of {"role": ..., "content": ...} dicts.
         We append the user's message, then stream the assistant reply by
-        updating/overwriting the last assistant message content.
         """
         history_msgs = (history_msgs or []) + [{"role": "user", "content": user_msg}]
-        # Add a placeholder assistant message to stream into
         history_msgs.append({"role": "assistant", "content": ""})
         yield history_msgs
-        # Build LLM messages and stream chunks
         messages_for_llm = to_llm_messages(history_msgs[:-1], user_msg)
         async for delta in stream_answer(messages_for_llm):
-            # Append the delta to the last assistant message
             history_msgs[-1]["content"] += delta
             yield history_msgs
-    # Wire both Enter and button click
     msg.submit(on_submit, inputs=[msg, chat], outputs=chat, queue=True)
     send_btn.click(on_submit, inputs=[msg, chat], outputs=chat, queue=True)
-# For local dev; on Spaces, Gradio calls launch automatically.
 if __name__ == "__main__":
     demo.launch()

 #   - huggingface_hub
 #
 # Space secret needed:
+#   - HUGGING_FACE_HUB_TOKEN or HF_TOKEN  (must have access to provider="novita")
 import os
 import asyncio
     "https://gradio-docs-mcp.hf.space/gradio_api/mcp/sse",
 )
+# Use Novita provider + the model you specified
 MODEL_ID = os.environ.get("CHAT_MODEL", "openai/gpt-oss-20b")
+PROVIDER = os.environ.get("CHAT_PROVIDER", "novita")  # <-- IMPORTANT
+# Accept either env name
+HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
 SYSTEM_PROMPT = (
     "You are a helpful assistant that answers questions strictly using the Gradio documentation "
 def get_mcp_client() -> MCPClient:
+    """
+    Create a single global MCPClient configured to use provider='novita'
+    and the given MODEL_ID. MCPClient internally uses huggingface_hub's
+    InferenceClient, so this matches your direct-Novita snippet.
+    """
     global mcp_client
     if mcp_client is None:
+        mcp_client = MCPClient(
+            model=MODEL_ID,
+            provider=PROVIDER,  # novita
+            api_key=HF_TOKEN,   # token must permit novita access
+        )
     return mcp_client
         if _initialized:
             return
         client = get_mcp_client()
+        # Await the coroutine to avoid "was never awaited" warnings
         await client.add_mcp_server(
             type="sse",
             url=GRADIO_DOCS_MCP_SSE,
             timeout=30,
         )
         _initialized = True
     with a system message prepended and the new user message appended.
     """
     msgs: List[Dict[str, Any]] = [{"role": "system", "content": SYSTEM_PROMPT}]
+    for m in history_msgs or []:
         role = m.get("role")
         content = m.get("content")
         if role in ("user", "assistant") and isinstance(content, str):
     await ensure_init()
     client = get_mcp_client()
+    # Pre-flight checks
     if not HF_TOKEN:
         yield (
+            "⚠️ Missing token: set `HF_TOKEN` (or `HUGGING_FACE_HUB_TOKEN`) "
+            "in your Space **Settings → Secrets**. The token must allow provider='novita'."
         )
         return
     try:
         async for chunk in client.process_single_turn_with_tools(messages_for_llm):
             if isinstance(chunk, dict):
                 ctype = chunk.get("type")
                 if ctype == "tool_log":
                     if isinstance(content, str) and content.strip():
                         yield f"\n\n**Result:**\n{content}"
             else:
                 yield str(chunk)
     except Exception as e:
         msg = str(e)
+        # Common failure modes
         if "401" in msg or "Unauthorized" in msg:
             yield (
+                "❌ Unauthorized (401). Your model call was rejected by Novita.\n\n"
+                "- Ensure the Space secret `HF_TOKEN` is set and valid.\n"
+                "- Confirm `HF_TOKEN` has access to provider='novita' and the model.\n"
+            )
+        elif "400" in msg or "Bad Request" in msg:
+            yield (
+                "❌ Bad Request (400). The Novita endpoint rejected the request.\n\n"
+                "- Double-check `CHAT_MODEL` (currently "
+                f"`{MODEL_ID}`) is valid for provider='novita'.\n"
+                "- Ensure your `HF_TOKEN` has the necessary permissions.\n"
             )
         else:
             yield f"❌ Error: {msg}"
 # ----------------------------
 with gr.Blocks(fill_height=True) as demo:
     gr.Markdown(
+        "# 🤖 Gradio Docs Chat (MCP Client via Novita)\n"
         "Ask anything about **Gradio**. Answers are grounded in the official docs via MCP."
     )
     chat = gr.Chatbot(
         label="Gradio Docs Assistant",
         height=520,
+        type="messages",   # expects: [{"role": "...", "content": "..."}]
     )
     with gr.Row():
         """
         history_msgs is a list of {"role": ..., "content": ...} dicts.
         We append the user's message, then stream the assistant reply by
+        updating the last assistant message content.
         """
         history_msgs = (history_msgs or []) + [{"role": "user", "content": user_msg}]
         history_msgs.append({"role": "assistant", "content": ""})
         yield history_msgs
         messages_for_llm = to_llm_messages(history_msgs[:-1], user_msg)
         async for delta in stream_answer(messages_for_llm):
             history_msgs[-1]["content"] += delta
             yield history_msgs
     msg.submit(on_submit, inputs=[msg, chat], outputs=chat, queue=True)
     send_btn.click(on_submit, inputs=[msg, chat], outputs=chat, queue=True)
 if __name__ == "__main__":
     demo.launch()