Spaces:

sudipta26889
/

gradio-doc

Sleeping

App Files Files Community

sudipta26889 commited on 17 days ago

Commit

b7bf9b1

verified ·

1 Parent(s): fdf4fd8

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -4

app.py CHANGED Viewed

@@ -51,7 +51,7 @@ GRADIO_DOCS_MCP_SSE = os.environ.get(
 )
 # Use a router model that supports OpenAI-style chat + tool calling.
-MODEL_ID = os.environ.get("CHAT_MODEL", "Qwen/Qwen2.5-7B-Instruct")
 PROVIDER = os.environ.get("CHAT_PROVIDER", "hf-inference")  # router
 HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
@@ -186,6 +186,7 @@ async def stream_answer(
                 ctype = chunk.get("type")
                 if ctype == "tool_log":
                     name = chunk.get("tool", "tool")
                     status = chunk.get("status", "")
                     _append_log(tool_log, f"- {name} **{status}**")
@@ -229,6 +230,7 @@ async def stream_answer(
                     }
             else:
                 yield {"delta": str(chunk), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
     except Exception as e:
@@ -238,6 +240,11 @@ async def stream_answer(
                 "❌ Unauthorized (401). Ensure your token is set and permitted for the chosen model/provider.\n"
                 f"- Model: `{model_id}`\n- Provider: `{provider}`\n"
             )
         else:
             err = f"❌ Error: {msg}"
         yield {"delta": err, "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
@@ -293,10 +300,12 @@ with gr.Blocks(fill_height=True) as demo:
           - tool activity
           - citations
         """
         history_msgs = (history_msgs or []) + [{"role": "user", "content": user_msg}]
         history_msgs.append({"role": "assistant", "content": ""})
         yield history_msgs, gr.update(value="_No tool activity yet._"), gr.update(value="_No citations captured yet._")
         messages_for_llm = to_llm_messages(history_msgs[:-1], user_msg, style_choice)
         async for chunk in stream_answer(messages_for_llm, MODEL_ID, PROVIDER, HF_TOKEN):
@@ -312,10 +321,10 @@ with gr.Blocks(fill_height=True) as demo:
 # ----------------------------
 # Gradio runtime (queue + launch)
 # ----------------------------
-# Assign back for older Gradio versions.
 demo = demo.queue(max_size=32)
-# Always launch on Spaces (banner lines about "local URL" are normal).
 demo.launch(
-    ssr_mode=False  # if SSR gives you trouble; otherwise you can omit this
 )

 )
 # Use a router model that supports OpenAI-style chat + tool calling.
+MODEL_ID = os.environ.get("CHAT_MODEL", "meta-llama/Meta-Llama-3.1-8B-Instruct")
 PROVIDER = os.environ.get("CHAT_PROVIDER", "hf-inference")  # router
 HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
                 ctype = chunk.get("type")
                 if ctype == "tool_log":
+                    # Example: {"type": "tool_log", "tool": "...", "status": "started/finished"}
                     name = chunk.get("tool", "tool")
                     status = chunk.get("status", "")
                     _append_log(tool_log, f"- {name} **{status}**")
                     }
             else:
+                # Fallback if provider yields plain strings
                 yield {"delta": str(chunk), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
     except Exception as e:
                 "❌ Unauthorized (401). Ensure your token is set and permitted for the chosen model/provider.\n"
                 f"- Model: `{model_id}`\n- Provider: `{provider}`\n"
             )
+        elif "404" in msg or "Not Found" in msg:
+            err = (
+                "❌ Model not found (404). The default model may not be available via hf-inference.\n"
+                "Consider setting `CHAT_MODEL` in your Space settings to a model that supports chat via the HF router."
+            )
         else:
             err = f"❌ Error: {msg}"
         yield {"delta": err, "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
           - tool activity
           - citations
         """
+        # Start a new assistant message for streaming
         history_msgs = (history_msgs or []) + [{"role": "user", "content": user_msg}]
         history_msgs.append({"role": "assistant", "content": ""})
         yield history_msgs, gr.update(value="_No tool activity yet._"), gr.update(value="_No citations captured yet._")
+        # Compose messages for LLM
         messages_for_llm = to_llm_messages(history_msgs[:-1], user_msg, style_choice)
         async for chunk in stream_answer(messages_for_llm, MODEL_ID, PROVIDER, HF_TOKEN):
 # ----------------------------
 # Gradio runtime (queue + launch)
 # ----------------------------
+# IMPORTANT: assign the queued app back to 'demo' for older Gradio versions.
 demo = demo.queue(max_size=32)
+# Always launch; Spaces runs this script directly. banner lines about "local URL" are normal.
 demo.launch(
+    ssr_mode=False
 )