openfree commited on
Commit
c3064c8
·
verified ·
1 Parent(s): 3d57ea2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -86
app.py CHANGED
@@ -2,12 +2,10 @@ import os
2
  import time
3
  import gc
4
  import threading
5
- from itertools import islice
6
  from datetime import datetime
7
  import gradio as gr
8
  import torch
9
  from transformers import pipeline, TextIteratorStreamer
10
- from duckduckgo_search import DDGS
11
  import spaces # Import spaces early to enable ZeroGPU support
12
 
13
  # ------------------------------
@@ -62,20 +60,6 @@ def load_pipeline(model_name):
62
  PIPELINES[model_name] = pipe
63
  return pipe
64
 
65
-
66
- def retrieve_context(query, max_results=6, max_chars=600):
67
- """
68
- Retrieve search snippets from DuckDuckGo (runs in background).
69
- Returns a list of result strings.
70
- """
71
- try:
72
- with DDGS() as ddgs:
73
- return [f"{i+1}. {r.get('title','No Title')} - {r.get('body','')[:max_chars]}"
74
- for i, r in enumerate(islice(ddgs.text(query, region="wt-wt", safesearch="off", timelimit="y"), max_results))]
75
- except Exception:
76
- return []
77
-
78
-
79
  def format_conversation(history, system_prompt):
80
  """
81
  Flatten chat history and system prompt into a single string.
@@ -94,52 +78,20 @@ def format_conversation(history, system_prompt):
94
 
95
  @spaces.GPU(duration=60)
96
  def chat_response(user_msg, chat_history, system_prompt,
97
- enable_search, max_results, max_chars,
98
  model_name, max_tokens, temperature,
99
  top_k, top_p, repeat_penalty):
100
  """
101
- Generates streaming chat responses, optionally with background web search.
102
  """
103
  cancel_event.clear()
104
  history = list(chat_history or [])
105
  history.append({'role': 'user', 'content': user_msg})
106
 
107
- # Launch web search if enabled
108
- debug = ''
109
- search_results = []
110
- if enable_search:
111
- debug = 'Search task started.'
112
- thread_search = threading.Thread(
113
- target=lambda: search_results.extend(
114
- retrieve_context(user_msg, int(max_results), int(max_chars))
115
- )
116
- )
117
- thread_search.daemon = True
118
- thread_search.start()
119
- else:
120
- debug = 'Web search disabled.'
121
-
122
  # Prepare assistant placeholder
123
  history.append({'role': 'assistant', 'content': ''})
124
 
125
  try:
126
- # wait up to 1s for snippets, then replace debug with them
127
- if enable_search:
128
- thread_search.join(timeout=1.0)
129
- if search_results:
130
- debug = "### Search results merged into prompt\n\n" + "\n".join(
131
- f"- {r}" for r in search_results
132
- )
133
- else:
134
- debug = "*No web search results found.*"
135
-
136
- # merge fetched snippets into the system prompt
137
- if search_results:
138
- enriched = system_prompt.strip() + "\n\nRelevant context:\n" + "\n".join(search_results)
139
- else:
140
- enriched = system_prompt
141
-
142
- prompt = format_conversation(history, enriched)
143
 
144
  pipe = load_pipeline(model_name)
145
  streamer = TextIteratorStreamer(pipe.tokenizer,
@@ -166,30 +118,21 @@ def chat_response(user_msg, chat_history, system_prompt,
166
  break
167
  assistant_text += chunk
168
  history[-1]['content'] = assistant_text
169
- # Show debug only once
170
- yield history, debug
171
  gen_thread.join()
172
  except Exception as e:
173
  history[-1]['content'] = f"Error: {e}"
174
- yield history, debug
175
  finally:
176
  gc.collect()
177
 
178
-
179
  def cancel_generation():
180
  cancel_event.set()
181
  return 'Generation cancelled.'
182
 
183
-
184
- def update_default_prompt(enable_search):
185
  today = datetime.now().strftime('%Y-%m-%d')
186
- if enable_search:
187
- return f"""You are Qwen3, a helpful and friendly AI assistant created by Alibaba Cloud.
188
- Today is {today}.
189
- You have access to real-time web search to provide the most up-to-date information.
190
- Be concise, accurate, and helpful. When appropriate, use information from the web search results provided to you."""
191
- else:
192
- return f"""You are Qwen3, a helpful and friendly AI assistant created by Alibaba Cloud.
193
  Today is {today}.
194
  Be concise, accurate, and helpful in your responses."""
195
 
@@ -214,12 +157,6 @@ css = """
214
  padding: 20px;
215
  margin-bottom: 20px;
216
  }
217
- .search-container {
218
- background: #e9f0ff;
219
- border-radius: 10px;
220
- padding: 15px;
221
- margin-bottom: 15px;
222
- }
223
  .controls-container {
224
  background: #f0f4fa;
225
  border-radius: 10px;
@@ -253,7 +190,7 @@ with gr.Blocks(title="Qwen3 Chat", css=css) as demo:
253
  gr.HTML("""
254
  <div class="qwen-header">
255
  <h1>🤖 Qwen3 Chat</h1>
256
- <p>Interact with Alibaba Cloud's Qwen3 language models with optional web search capability</p>
257
  </div>
258
  """)
259
 
@@ -267,16 +204,9 @@ with gr.Blocks(title="Qwen3 Chat", css=css) as demo:
267
  elem_classes="model-select"
268
  )
269
 
270
- with gr.Group(elem_classes="search-container"):
271
- gr.Markdown("### 🔍 Search Settings")
272
- search_chk = gr.Checkbox(label="Enable Web Search", value=True)
273
- with gr.Row():
274
- mr = gr.Number(value=6, precision=0, label="Max Results")
275
- mc = gr.Number(value=600, precision=0, label="Max Chars/Result")
276
-
277
  with gr.Group(elem_classes="controls-container"):
278
  gr.Markdown("### ⚙️ Generation Parameters")
279
- sys_prompt = gr.Textbox(label="System Prompt", lines=5, value=update_default_prompt(True))
280
  with gr.Row():
281
  max_tok = gr.Slider(64, 1024, value=512, step=32, label="Max Tokens")
282
  with gr.Row():
@@ -298,7 +228,6 @@ with gr.Blocks(title="Qwen3 Chat", css=css) as demo:
298
  lines=2,
299
  show_label=False
300
  )
301
- dbg = gr.Markdown(elem_classes="search-results")
302
 
303
  gr.HTML("""
304
  <div class="footer">
@@ -310,19 +239,18 @@ with gr.Blocks(title="Qwen3 Chat", css=css) as demo:
310
  def get_model_name(full_selection):
311
  return full_selection.split(" - ")[0]
312
 
313
- search_chk.change(fn=update_default_prompt, inputs=search_chk, outputs=sys_prompt)
314
- clr.click(fn=lambda: ([], "", ""), outputs=[chat, txt, dbg])
315
- cnl.click(fn=cancel_generation, outputs=dbg)
316
 
317
  txt.submit(
318
- fn=lambda msg, history, prompt, search, mr, mc, model, tok, temp, k, p, rp:
319
  chat_response(
320
- msg, history, prompt, search, mr, mc,
321
  get_model_name(model), tok, temp, k, p, rp
322
  ),
323
- inputs=[txt, chat, sys_prompt, search_chk, mr, mc,
324
  model_dd, max_tok, temp, k, p, rp],
325
- outputs=[chat, dbg],
326
  show_progress=True
327
  )
328
 
 
2
  import time
3
  import gc
4
  import threading
 
5
  from datetime import datetime
6
  import gradio as gr
7
  import torch
8
  from transformers import pipeline, TextIteratorStreamer
 
9
  import spaces # Import spaces early to enable ZeroGPU support
10
 
11
  # ------------------------------
 
60
  PIPELINES[model_name] = pipe
61
  return pipe
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  def format_conversation(history, system_prompt):
64
  """
65
  Flatten chat history and system prompt into a single string.
 
78
 
79
  @spaces.GPU(duration=60)
80
  def chat_response(user_msg, chat_history, system_prompt,
 
81
  model_name, max_tokens, temperature,
82
  top_k, top_p, repeat_penalty):
83
  """
84
+ Generates streaming chat responses.
85
  """
86
  cancel_event.clear()
87
  history = list(chat_history or [])
88
  history.append({'role': 'user', 'content': user_msg})
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  # Prepare assistant placeholder
91
  history.append({'role': 'assistant', 'content': ''})
92
 
93
  try:
94
+ prompt = format_conversation(history, system_prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  pipe = load_pipeline(model_name)
97
  streamer = TextIteratorStreamer(pipe.tokenizer,
 
118
  break
119
  assistant_text += chunk
120
  history[-1]['content'] = assistant_text
121
+ yield history
 
122
  gen_thread.join()
123
  except Exception as e:
124
  history[-1]['content'] = f"Error: {e}"
125
+ yield history
126
  finally:
127
  gc.collect()
128
 
 
129
  def cancel_generation():
130
  cancel_event.set()
131
  return 'Generation cancelled.'
132
 
133
+ def get_default_system_prompt():
 
134
  today = datetime.now().strftime('%Y-%m-%d')
135
+ return f"""You are Qwen3, a helpful and friendly AI assistant created by Alibaba Cloud.
 
 
 
 
 
 
136
  Today is {today}.
137
  Be concise, accurate, and helpful in your responses."""
138
 
 
157
  padding: 20px;
158
  margin-bottom: 20px;
159
  }
 
 
 
 
 
 
160
  .controls-container {
161
  background: #f0f4fa;
162
  border-radius: 10px;
 
190
  gr.HTML("""
191
  <div class="qwen-header">
192
  <h1>🤖 Qwen3 Chat</h1>
193
+ <p>Interact with Alibaba Cloud's Qwen3 language models</p>
194
  </div>
195
  """)
196
 
 
204
  elem_classes="model-select"
205
  )
206
 
 
 
 
 
 
 
 
207
  with gr.Group(elem_classes="controls-container"):
208
  gr.Markdown("### ⚙️ Generation Parameters")
209
+ sys_prompt = gr.Textbox(label="System Prompt", lines=5, value=get_default_system_prompt())
210
  with gr.Row():
211
  max_tok = gr.Slider(64, 1024, value=512, step=32, label="Max Tokens")
212
  with gr.Row():
 
228
  lines=2,
229
  show_label=False
230
  )
 
231
 
232
  gr.HTML("""
233
  <div class="footer">
 
239
  def get_model_name(full_selection):
240
  return full_selection.split(" - ")[0]
241
 
242
+ clr.click(fn=lambda: ([], ""), outputs=[chat, txt])
243
+ cnl.click(fn=cancel_generation)
 
244
 
245
  txt.submit(
246
+ fn=lambda msg, history, prompt, model, tok, temp, k, p, rp:
247
  chat_response(
248
+ msg, history, prompt,
249
  get_model_name(model), tok, temp, k, p, rp
250
  ),
251
+ inputs=[txt, chat, sys_prompt,
252
  model_dd, max_tok, temp, k, p, rp],
253
+ outputs=[chat],
254
  show_progress=True
255
  )
256