Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -9,6 +9,7 @@
|
|
9 |
# Requirements (in requirements.txt):
|
10 |
# gradio>=4.44.0
|
11 |
# huggingface_hub>=0.24.0
|
|
|
12 |
#
|
13 |
# Space secrets needed:
|
14 |
# - HUGGING_FACE_HUB_TOKEN or HF_TOKEN (token must allow using the selected provider/model)
|
@@ -25,6 +26,22 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple
|
|
25 |
import gradio as gr
|
26 |
from huggingface_hub import MCPClient
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
# ----------------------------
|
29 |
# Configuration
|
30 |
# ----------------------------
|
@@ -126,8 +143,7 @@ def _format_tool_log(log_lines: List[str]) -> str:
|
|
126 |
def _format_citations(cites: List[Tuple[str, Optional[str]]]) -> str:
|
127 |
if not cites:
|
128 |
return "_No citations captured yet._"
|
129 |
-
|
130 |
-
last = cites[-12:]
|
131 |
lines = []
|
132 |
for (label, url) in last:
|
133 |
if url:
|
@@ -170,7 +186,6 @@ async def stream_answer(
|
|
170 |
ctype = chunk.get("type")
|
171 |
|
172 |
if ctype == "tool_log":
|
173 |
-
# Example: {"type": "tool_log", "tool": "...", "status": "started/finished"}
|
174 |
name = chunk.get("tool", "tool")
|
175 |
status = chunk.get("status", "")
|
176 |
_append_log(tool_log, f"- {name} **{status}**")
|
@@ -183,7 +198,7 @@ async def stream_answer(
|
|
183 |
yield {"delta": chunk.get("text", ""), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
|
184 |
|
185 |
elif ctype == "tool_result":
|
186 |
-
#
|
187 |
tool_name = chunk.get("tool", "tool")
|
188 |
content = chunk.get("content")
|
189 |
url = None
|
@@ -214,7 +229,6 @@ async def stream_answer(
|
|
214 |
}
|
215 |
|
216 |
else:
|
217 |
-
# Fallback if provider yields plain strings
|
218 |
yield {"delta": str(chunk), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
|
219 |
|
220 |
except Exception as e:
|
@@ -279,12 +293,10 @@ with gr.Blocks(fill_height=True) as demo:
|
|
279 |
- tool activity
|
280 |
- citations
|
281 |
"""
|
282 |
-
# Start a new assistant message for streaming
|
283 |
history_msgs = (history_msgs or []) + [{"role": "user", "content": user_msg}]
|
284 |
history_msgs.append({"role": "assistant", "content": ""})
|
285 |
yield history_msgs, gr.update(value="_No tool activity yet._"), gr.update(value="_No citations captured yet._")
|
286 |
|
287 |
-
# Compose messages for LLM
|
288 |
messages_for_llm = to_llm_messages(history_msgs[:-1], user_msg, style_choice)
|
289 |
|
290 |
async for chunk in stream_answer(messages_for_llm, MODEL_ID, PROVIDER, HF_TOKEN):
|
@@ -300,10 +312,10 @@ with gr.Blocks(fill_height=True) as demo:
|
|
300 |
# ----------------------------
|
301 |
# Gradio runtime (queue + launch)
|
302 |
# ----------------------------
|
303 |
-
#
|
304 |
demo = demo.queue(max_size=32)
|
305 |
|
306 |
-
# Always launch
|
307 |
demo.launch(
|
308 |
-
ssr_mode=False
|
309 |
)
|
|
|
9 |
# Requirements (in requirements.txt):
|
10 |
# gradio>=4.44.0
|
11 |
# huggingface_hub>=0.24.0
|
12 |
+
# spaces # only needed if your Space hardware is ZeroGPU
|
13 |
#
|
14 |
# Space secrets needed:
|
15 |
# - HUGGING_FACE_HUB_TOKEN or HF_TOKEN (token must allow using the selected provider/model)
|
|
|
26 |
import gradio as gr
|
27 |
from huggingface_hub import MCPClient
|
28 |
|
29 |
+
# -------------------------------------------------
|
30 |
+
# Optional ZeroGPU shim (silences "No @spaces.GPU..." startup message)
|
31 |
+
# -------------------------------------------------
|
32 |
+
SPACES_ZERO_GPU = bool(os.environ.get("SPACE_ZERO_GPU", "")) # HF sets this in ZeroGPU envs
|
33 |
+
try:
|
34 |
+
import spaces # type: ignore
|
35 |
+
except Exception:
|
36 |
+
spaces = None # not installed on CPU Spaces; harmless
|
37 |
+
|
38 |
+
if spaces is not None:
|
39 |
+
@spaces.GPU # this just lets ZeroGPU detect a GPU-capable function at startup
|
40 |
+
def _zero_gpu_probe():
|
41 |
+
# We don't actually need a GPU for this app (remote inference),
|
42 |
+
# but the decorated function's presence satisfies ZeroGPU's startup check.
|
43 |
+
return "ok"
|
44 |
+
|
45 |
# ----------------------------
|
46 |
# Configuration
|
47 |
# ----------------------------
|
|
|
143 |
def _format_citations(cites: List[Tuple[str, Optional[str]]]) -> str:
|
144 |
if not cites:
|
145 |
return "_No citations captured yet._"
|
146 |
+
last = cites[-12:] # show recent
|
|
|
147 |
lines = []
|
148 |
for (label, url) in last:
|
149 |
if url:
|
|
|
186 |
ctype = chunk.get("type")
|
187 |
|
188 |
if ctype == "tool_log":
|
|
|
189 |
name = chunk.get("tool", "tool")
|
190 |
status = chunk.get("status", "")
|
191 |
_append_log(tool_log, f"- {name} **{status}**")
|
|
|
198 |
yield {"delta": chunk.get("text", ""), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
|
199 |
|
200 |
elif ctype == "tool_result":
|
201 |
+
# Capture a useful citation label and optional URL if present
|
202 |
tool_name = chunk.get("tool", "tool")
|
203 |
content = chunk.get("content")
|
204 |
url = None
|
|
|
229 |
}
|
230 |
|
231 |
else:
|
|
|
232 |
yield {"delta": str(chunk), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
|
233 |
|
234 |
except Exception as e:
|
|
|
293 |
- tool activity
|
294 |
- citations
|
295 |
"""
|
|
|
296 |
history_msgs = (history_msgs or []) + [{"role": "user", "content": user_msg}]
|
297 |
history_msgs.append({"role": "assistant", "content": ""})
|
298 |
yield history_msgs, gr.update(value="_No tool activity yet._"), gr.update(value="_No citations captured yet._")
|
299 |
|
|
|
300 |
messages_for_llm = to_llm_messages(history_msgs[:-1], user_msg, style_choice)
|
301 |
|
302 |
async for chunk in stream_answer(messages_for_llm, MODEL_ID, PROVIDER, HF_TOKEN):
|
|
|
312 |
# ----------------------------
|
313 |
# Gradio runtime (queue + launch)
|
314 |
# ----------------------------
|
315 |
+
# Assign back for older Gradio versions.
|
316 |
demo = demo.queue(max_size=32)
|
317 |
|
318 |
+
# Always launch on Spaces (banner lines about "local URL" are normal).
|
319 |
demo.launch(
|
320 |
+
ssr_mode=False # if SSR gives you trouble; otherwise you can omit this
|
321 |
)
|