Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -51,7 +51,7 @@ GRADIO_DOCS_MCP_SSE = os.environ.get(
|
|
51 |
)
|
52 |
|
53 |
# Use a router model that supports OpenAI-style chat + tool calling.
|
54 |
-
MODEL_ID = os.environ.get("CHAT_MODEL", "
|
55 |
PROVIDER = os.environ.get("CHAT_PROVIDER", "hf-inference") # router
|
56 |
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
|
57 |
|
@@ -186,6 +186,7 @@ async def stream_answer(
|
|
186 |
ctype = chunk.get("type")
|
187 |
|
188 |
if ctype == "tool_log":
|
|
|
189 |
name = chunk.get("tool", "tool")
|
190 |
status = chunk.get("status", "")
|
191 |
_append_log(tool_log, f"- {name} **{status}**")
|
@@ -229,6 +230,7 @@ async def stream_answer(
|
|
229 |
}
|
230 |
|
231 |
else:
|
|
|
232 |
yield {"delta": str(chunk), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
|
233 |
|
234 |
except Exception as e:
|
@@ -238,6 +240,11 @@ async def stream_answer(
|
|
238 |
"❌ Unauthorized (401). Ensure your token is set and permitted for the chosen model/provider.\n"
|
239 |
f"- Model: `{model_id}`\n- Provider: `{provider}`\n"
|
240 |
)
|
|
|
|
|
|
|
|
|
|
|
241 |
else:
|
242 |
err = f"❌ Error: {msg}"
|
243 |
yield {"delta": err, "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
|
@@ -293,10 +300,12 @@ with gr.Blocks(fill_height=True) as demo:
|
|
293 |
- tool activity
|
294 |
- citations
|
295 |
"""
|
|
|
296 |
history_msgs = (history_msgs or []) + [{"role": "user", "content": user_msg}]
|
297 |
history_msgs.append({"role": "assistant", "content": ""})
|
298 |
yield history_msgs, gr.update(value="_No tool activity yet._"), gr.update(value="_No citations captured yet._")
|
299 |
|
|
|
300 |
messages_for_llm = to_llm_messages(history_msgs[:-1], user_msg, style_choice)
|
301 |
|
302 |
async for chunk in stream_answer(messages_for_llm, MODEL_ID, PROVIDER, HF_TOKEN):
|
@@ -312,10 +321,10 @@ with gr.Blocks(fill_height=True) as demo:
|
|
312 |
# ----------------------------
|
313 |
# Gradio runtime (queue + launch)
|
314 |
# ----------------------------
|
315 |
-
#
|
316 |
demo = demo.queue(max_size=32)
|
317 |
|
318 |
-
# Always launch
|
319 |
demo.launch(
|
320 |
-
ssr_mode=False
|
321 |
)
|
|
|
51 |
)
|
52 |
|
53 |
# Use a router model that supports OpenAI-style chat + tool calling.
|
54 |
+
MODEL_ID = os.environ.get("CHAT_MODEL", "meta-llama/Meta-Llama-3.1-8B-Instruct")
|
55 |
PROVIDER = os.environ.get("CHAT_PROVIDER", "hf-inference") # router
|
56 |
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
|
57 |
|
|
|
186 |
ctype = chunk.get("type")
|
187 |
|
188 |
if ctype == "tool_log":
|
189 |
+
# Example: {"type": "tool_log", "tool": "...", "status": "started/finished"}
|
190 |
name = chunk.get("tool", "tool")
|
191 |
status = chunk.get("status", "")
|
192 |
_append_log(tool_log, f"- {name} **{status}**")
|
|
|
230 |
}
|
231 |
|
232 |
else:
|
233 |
+
# Fallback if provider yields plain strings
|
234 |
yield {"delta": str(chunk), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
|
235 |
|
236 |
except Exception as e:
|
|
|
240 |
"❌ Unauthorized (401). Ensure your token is set and permitted for the chosen model/provider.\n"
|
241 |
f"- Model: `{model_id}`\n- Provider: `{provider}`\n"
|
242 |
)
|
243 |
+
elif "404" in msg or "Not Found" in msg:
|
244 |
+
err = (
|
245 |
+
"❌ Model not found (404). The default model may not be available via hf-inference.\n"
|
246 |
+
"Consider setting `CHAT_MODEL` in your Space settings to a model that supports chat via the HF router."
|
247 |
+
)
|
248 |
else:
|
249 |
err = f"❌ Error: {msg}"
|
250 |
yield {"delta": err, "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
|
|
|
300 |
- tool activity
|
301 |
- citations
|
302 |
"""
|
303 |
+
# Start a new assistant message for streaming
|
304 |
history_msgs = (history_msgs or []) + [{"role": "user", "content": user_msg}]
|
305 |
history_msgs.append({"role": "assistant", "content": ""})
|
306 |
yield history_msgs, gr.update(value="_No tool activity yet._"), gr.update(value="_No citations captured yet._")
|
307 |
|
308 |
+
# Compose messages for LLM
|
309 |
messages_for_llm = to_llm_messages(history_msgs[:-1], user_msg, style_choice)
|
310 |
|
311 |
async for chunk in stream_answer(messages_for_llm, MODEL_ID, PROVIDER, HF_TOKEN):
|
|
|
321 |
# ----------------------------
|
322 |
# Gradio runtime (queue + launch)
|
323 |
# ----------------------------
|
324 |
+
# IMPORTANT: assign the queued app back to 'demo' for older Gradio versions.
|
325 |
demo = demo.queue(max_size=32)
|
326 |
|
327 |
+
# Always launch; Spaces runs this script directly. banner lines about "local URL" are normal.
|
328 |
demo.launch(
|
329 |
+
ssr_mode=False
|
330 |
)
|