sudipta26889 commited on
Commit
b7bf9b1
·
verified ·
1 Parent(s): fdf4fd8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -4
app.py CHANGED
@@ -51,7 +51,7 @@ GRADIO_DOCS_MCP_SSE = os.environ.get(
51
  )
52
 
53
  # Use a router model that supports OpenAI-style chat + tool calling.
54
- MODEL_ID = os.environ.get("CHAT_MODEL", "Qwen/Qwen2.5-7B-Instruct")
55
  PROVIDER = os.environ.get("CHAT_PROVIDER", "hf-inference") # router
56
  HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
57
 
@@ -186,6 +186,7 @@ async def stream_answer(
186
  ctype = chunk.get("type")
187
 
188
  if ctype == "tool_log":
 
189
  name = chunk.get("tool", "tool")
190
  status = chunk.get("status", "")
191
  _append_log(tool_log, f"- {name} **{status}**")
@@ -229,6 +230,7 @@ async def stream_answer(
229
  }
230
 
231
  else:
 
232
  yield {"delta": str(chunk), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
233
 
234
  except Exception as e:
@@ -238,6 +240,11 @@ async def stream_answer(
238
  "❌ Unauthorized (401). Ensure your token is set and permitted for the chosen model/provider.\n"
239
  f"- Model: `{model_id}`\n- Provider: `{provider}`\n"
240
  )
 
 
 
 
 
241
  else:
242
  err = f"❌ Error: {msg}"
243
  yield {"delta": err, "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
@@ -293,10 +300,12 @@ with gr.Blocks(fill_height=True) as demo:
293
  - tool activity
294
  - citations
295
  """
 
296
  history_msgs = (history_msgs or []) + [{"role": "user", "content": user_msg}]
297
  history_msgs.append({"role": "assistant", "content": ""})
298
  yield history_msgs, gr.update(value="_No tool activity yet._"), gr.update(value="_No citations captured yet._")
299
 
 
300
  messages_for_llm = to_llm_messages(history_msgs[:-1], user_msg, style_choice)
301
 
302
  async for chunk in stream_answer(messages_for_llm, MODEL_ID, PROVIDER, HF_TOKEN):
@@ -312,10 +321,10 @@ with gr.Blocks(fill_height=True) as demo:
312
  # ----------------------------
313
  # Gradio runtime (queue + launch)
314
  # ----------------------------
315
- # Assign back for older Gradio versions.
316
  demo = demo.queue(max_size=32)
317
 
318
- # Always launch on Spaces (banner lines about "local URL" are normal).
319
  demo.launch(
320
- ssr_mode=False # if SSR gives you trouble; otherwise you can omit this
321
  )
 
51
  )
52
 
53
  # Use a router model that supports OpenAI-style chat + tool calling.
54
+ MODEL_ID = os.environ.get("CHAT_MODEL", "meta-llama/Meta-Llama-3.1-8B-Instruct")
55
  PROVIDER = os.environ.get("CHAT_PROVIDER", "hf-inference") # router
56
  HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
57
 
 
186
  ctype = chunk.get("type")
187
 
188
  if ctype == "tool_log":
189
+ # Example: {"type": "tool_log", "tool": "...", "status": "started/finished"}
190
  name = chunk.get("tool", "tool")
191
  status = chunk.get("status", "")
192
  _append_log(tool_log, f"- {name} **{status}**")
 
230
  }
231
 
232
  else:
233
+ # Fallback if provider yields plain strings
234
  yield {"delta": str(chunk), "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
235
 
236
  except Exception as e:
 
240
  "❌ Unauthorized (401). Ensure your token is set and permitted for the chosen model/provider.\n"
241
  f"- Model: `{model_id}`\n- Provider: `{provider}`\n"
242
  )
243
+ elif "404" in msg or "Not Found" in msg:
244
+ err = (
245
+ "❌ Model not found (404). The default model may not be available via hf-inference.\n"
246
+ "Consider setting `CHAT_MODEL` in your Space settings to a model that supports chat via the HF router."
247
+ )
248
  else:
249
  err = f"❌ Error: {msg}"
250
  yield {"delta": err, "tool_log": _format_tool_log(tool_log), "citations": _format_citations(citations)}
 
300
  - tool activity
301
  - citations
302
  """
303
+ # Start a new assistant message for streaming
304
  history_msgs = (history_msgs or []) + [{"role": "user", "content": user_msg}]
305
  history_msgs.append({"role": "assistant", "content": ""})
306
  yield history_msgs, gr.update(value="_No tool activity yet._"), gr.update(value="_No citations captured yet._")
307
 
308
+ # Compose messages for LLM
309
  messages_for_llm = to_llm_messages(history_msgs[:-1], user_msg, style_choice)
310
 
311
  async for chunk in stream_answer(messages_for_llm, MODEL_ID, PROVIDER, HF_TOKEN):
 
321
  # ----------------------------
322
  # Gradio runtime (queue + launch)
323
  # ----------------------------
324
+ # IMPORTANT: assign the queued app back to 'demo' for older Gradio versions.
325
  demo = demo.queue(max_size=32)
326
 
327
+ # Always launch; Spaces runs this script directly. banner lines about "local URL" are normal.
328
  demo.launch(
329
+ ssr_mode=False
330
  )