openfree commited on
Commit
2f0356e
·
verified ·
1 Parent(s): 92cc237

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +140 -56
app.py CHANGED
@@ -10,36 +10,19 @@ from transformers import pipeline, TextIteratorStreamer
10
  from duckduckgo_search import DDGS
11
  import spaces # Import spaces early to enable ZeroGPU support
12
 
13
- # Optional: Disable GPU visibility if you wish to force CPU usage
14
- # os.environ["CUDA_VISIBLE_DEVICES"] = ""
15
-
16
  # ------------------------------
17
  # Global Cancellation Event
18
  # ------------------------------
19
  cancel_event = threading.Event()
20
 
21
  # ------------------------------
22
- # Torch-Compatible Model Definitions with Adjusted Descriptions
23
  # ------------------------------
24
  MODELS = {
25
-
26
- "Qwen3-8B": {"repo_id": "Qwen/Qwen3-8B", "description": "Qwen3-8B"},
27
- "Qwen3-4B": {"repo_id": "Qwen/Qwen3-4B", "description": "Qwen3-4B"},
28
- "Qwen3-1.7B": {"repo_id": "Qwen/Qwen3-1,7B", "description": "Qwen3-1.7B"},
29
- "Qwen3-0.6B": {"repo_id": "Qwen/Qwen3-0.6B", "description": "Qwen3-0.6B"},
30
- "Gemma-3-4B-IT": {"repo_id": "unsloth/gemma-3-4b-it", "description": "Gemma-3-4B-IT"},
31
- "SmolLM2-135M-Instruct-TaiwanChat": {"repo_id": "Luigi/SmolLM2-135M-Instruct-TaiwanChat", "description": "SmolLM2‑135M Instruct fine-tuned on TaiwanChat"},
32
- "SmolLM2-135M-Instruct": {"repo_id": "HuggingFaceTB/SmolLM2-135M-Instruct", "description": "Original SmolLM2‑135M Instruct"},
33
- "SmolLM2-360M-Instruct-TaiwanChat": {"repo_id": "Luigi/SmolLM2-360M-Instruct-TaiwanChat", "description": "SmolLM2‑360M Instruct fine-tuned on TaiwanChat"},
34
- "Llama-3.2-Taiwan-3B-Instruct": {"repo_id": "lianghsun/Llama-3.2-Taiwan-3B-Instruct", "description": "Llama-3.2-Taiwan-3B-Instruct"},
35
- "MiniCPM3-4B": {"repo_id": "openbmb/MiniCPM3-4B", "description": "MiniCPM3-4B"},
36
- "Qwen2.5-3B-Instruct": {"repo_id": "Qwen/Qwen2.5-3B-Instruct", "description": "Qwen2.5-3B-Instruct"},
37
- "Qwen2.5-7B-Instruct": {"repo_id": "Qwen/Qwen2.5-7B-Instruct", "description": "Qwen2.5-7B-Instruct"},
38
- "Phi-4-mini-Instruct": {"repo_id": "unsloth/Phi-4-mini-instruct", "description": "Phi-4-mini-Instruct"},
39
- "Meta-Llama-3.1-8B-Instruct": {"repo_id": "MaziyarPanahi/Meta-Llama-3.1-8B-Instruct", "description": "Meta-Llama-3.1-8B-Instruct"},
40
- "DeepSeek-R1-Distill-Llama-8B": {"repo_id": "unsloth/DeepSeek-R1-Distill-Llama-8B", "description": "DeepSeek-R1-Distill-Llama-8B"},
41
- "Mistral-7B-Instruct-v0.3": {"repo_id": "MaziyarPanahi/Mistral-7B-Instruct-v0.3", "description": "Mistral-7B-Instruct-v0.3"},
42
- "Qwen2.5-Coder-7B-Instruct": {"repo_id": "Qwen/Qwen2.5-Coder-7B-Instruct", "description": "Qwen2.5-Coder-7B-Instruct"},
43
  }
44
 
45
  # Global cache for pipelines to avoid re-loading.
@@ -140,13 +123,6 @@ def chat_response(user_msg, chat_history, system_prompt,
140
  history.append({'role': 'assistant', 'content': ''})
141
 
142
  try:
143
-
144
- # merge any fetched search results into the system prompt
145
- if search_results:
146
- enriched = system_prompt.strip() + "\n\nRelevant context:\n" + "\n".join(search_results)
147
- else:
148
- enriched = system_prompt
149
-
150
  # wait up to 1s for snippets, then replace debug with them
151
  if enable_search:
152
  thread_search.join(timeout=1.0)
@@ -207,40 +183,148 @@ def cancel_generation():
207
 
208
  def update_default_prompt(enable_search):
209
  today = datetime.now().strftime('%Y-%m-%d')
210
- return f"You are a helpful assistant. Today is {today}."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
 
212
  # ------------------------------
213
  # Gradio UI
214
  # ------------------------------
215
- with gr.Blocks(title="LLM Inference with ZeroGPU") as demo:
216
- gr.Markdown("## 🧠 ZeroGPU LLM Inference with Web Search")
217
- gr.Markdown("Interact with the model. Select parameters and chat below.")
 
 
 
 
 
218
  with gr.Row():
219
  with gr.Column(scale=3):
220
- model_dd = gr.Dropdown(label="Select Model", choices=list(MODELS.keys()), value=list(MODELS.keys())[0])
221
- search_chk = gr.Checkbox(label="Enable Web Search", value=True)
222
- sys_prompt = gr.Textbox(label="System Prompt", lines=3, value=update_default_prompt(search_chk.value))
223
- gr.Markdown("### Generation Parameters")
224
- max_tok = gr.Slider(64, 1024, value=512, step=32, label="Max Tokens")
225
- temp = gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature")
226
- k = gr.Slider(1, 100, value=40, step=1, label="Top-K")
227
- p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P")
228
- rp = gr.Slider(1.0, 2.0, value=1.1, step=0.1, label="Repetition Penalty")
229
- gr.Markdown("### Web Search Settings")
230
- mr = gr.Number(value=6, precision=0, label="Max Results")
231
- mc = gr.Number(value=600, precision=0, label="Max Chars/Result")
232
- clr = gr.Button("Clear Chat")
233
- cnl = gr.Button("Cancel Generation")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  with gr.Column(scale=7):
235
- chat = gr.Chatbot(type="messages")
236
- txt = gr.Textbox(placeholder="Type your message and press Enter...")
237
- dbg = gr.Markdown()
238
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  search_chk.change(fn=update_default_prompt, inputs=search_chk, outputs=sys_prompt)
240
  clr.click(fn=lambda: ([], "", ""), outputs=[chat, txt, dbg])
241
  cnl.click(fn=cancel_generation, outputs=dbg)
242
- txt.submit(fn=chat_response,
243
- inputs=[txt, chat, sys_prompt, search_chk, mr, mc,
244
- model_dd, max_tok, temp, k, p, rp],
245
- outputs=[chat, dbg])
246
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
10
  from duckduckgo_search import DDGS
11
  import spaces # Import spaces early to enable ZeroGPU support
12
 
 
 
 
13
  # ------------------------------
14
  # Global Cancellation Event
15
  # ------------------------------
16
  cancel_event = threading.Event()
17
 
18
  # ------------------------------
19
+ # Qwen3 Model Definitions
20
  # ------------------------------
21
  MODELS = {
22
+ "Qwen3-8B": {"repo_id": "Qwen/Qwen3-8B", "description": "Qwen3-8B - Largest model with highest capabilities"},
23
+ "Qwen3-4B": {"repo_id": "Qwen/Qwen3-4B", "description": "Qwen3-4B - Good balance of performance and efficiency"},
24
+ "Qwen3-1.7B": {"repo_id": "Qwen/Qwen3-1.7B", "description": "Qwen3-1.7B - Smaller model for faster responses"},
25
+ "Qwen3-0.6B": {"repo_id": "Qwen/Qwen3-0.6B", "description": "Qwen3-0.6B - Ultra-lightweight model"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  }
27
 
28
  # Global cache for pipelines to avoid re-loading.
 
123
  history.append({'role': 'assistant', 'content': ''})
124
 
125
  try:
 
 
 
 
 
 
 
126
  # wait up to 1s for snippets, then replace debug with them
127
  if enable_search:
128
  thread_search.join(timeout=1.0)
 
183
 
184
  def update_default_prompt(enable_search):
185
  today = datetime.now().strftime('%Y-%m-%d')
186
+ if enable_search:
187
+ return f"""You are Qwen3, a helpful and friendly AI assistant created by Alibaba Cloud.
188
+ Today is {today}.
189
+ You have access to real-time web search to provide the most up-to-date information.
190
+ Be concise, accurate, and helpful. When appropriate, use information from the web search results provided to you."""
191
+ else:
192
+ return f"""You are Qwen3, a helpful and friendly AI assistant created by Alibaba Cloud.
193
+ Today is {today}.
194
+ Be concise, accurate, and helpful in your responses."""
195
+
196
+ # CSS for improved visual style
197
+ css = """
198
+ .gradio-container {
199
+ background-color: #f5f7fb !important;
200
+ }
201
+ .qwen-header {
202
+ background: linear-gradient(90deg, #0099FF, #0066CC);
203
+ padding: 20px;
204
+ border-radius: 10px;
205
+ margin-bottom: 20px;
206
+ text-align: center;
207
+ color: white;
208
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
209
+ }
210
+ .qwen-container {
211
+ border-radius: 10px;
212
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
213
+ background: white;
214
+ padding: 20px;
215
+ margin-bottom: 20px;
216
+ }
217
+ .search-container {
218
+ background: #e9f0ff;
219
+ border-radius: 10px;
220
+ padding: 15px;
221
+ margin-bottom: 15px;
222
+ }
223
+ .controls-container {
224
+ background: #f0f4fa;
225
+ border-radius: 10px;
226
+ padding: 15px;
227
+ margin-bottom: 15px;
228
+ }
229
+ .model-select {
230
+ border: 2px solid #0099FF !important;
231
+ border-radius: 8px !important;
232
+ }
233
+ .button-primary {
234
+ background-color: #0099FF !important;
235
+ color: white !important;
236
+ }
237
+ .button-secondary {
238
+ background-color: #6c757d !important;
239
+ color: white !important;
240
+ }
241
+ .footer {
242
+ text-align: center;
243
+ margin-top: 20px;
244
+ font-size: 0.8em;
245
+ color: #666;
246
+ }
247
+ """
248
 
249
  # ------------------------------
250
  # Gradio UI
251
  # ------------------------------
252
+ with gr.Blocks(title="Qwen3 Chat", css=css) as demo:
253
+ gr.HTML("""
254
+ <div class="qwen-header">
255
+ <h1>🤖 Qwen3 Chat</h1>
256
+ <p>Interact with Alibaba Cloud's Qwen3 language models with optional web search capability</p>
257
+ </div>
258
+ """)
259
+
260
  with gr.Row():
261
  with gr.Column(scale=3):
262
+ with gr.Box(elem_classes="qwen-container"):
263
+ model_dd = gr.Dropdown(
264
+ label="Select Qwen3 Model",
265
+ choices=[f"{k} - {v['description']}" for k, v in MODELS.items()],
266
+ value=f"{list(MODELS.keys())[0]} - {MODELS[list(MODELS.keys())[0]]['description']}",
267
+ elem_classes="model-select"
268
+ )
269
+
270
+ with gr.Box(elem_classes="search-container"):
271
+ gr.Markdown("### 🔍 Search Settings")
272
+ search_chk = gr.Checkbox(label="Enable Web Search", value=True)
273
+ with gr.Row():
274
+ mr = gr.Number(value=6, precision=0, label="Max Results", min_value=1, max_value=10)
275
+ mc = gr.Number(value=600, precision=0, label="Max Chars/Result", min_value=100, max_value=1000)
276
+
277
+ with gr.Box(elem_classes="controls-container"):
278
+ gr.Markdown("### ⚙️ Generation Parameters")
279
+ sys_prompt = gr.Textbox(label="System Prompt", lines=5, value=update_default_prompt(True))
280
+ with gr.Row():
281
+ max_tok = gr.Slider(64, 1024, value=512, step=32, label="Max Tokens")
282
+ with gr.Row():
283
+ temp = gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature")
284
+ p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P")
285
+ with gr.Row():
286
+ k = gr.Slider(1, 100, value=40, step=1, label="Top-K")
287
+ rp = gr.Slider(1.0, 2.0, value=1.1, step=0.1, label="Repetition Penalty")
288
+
289
+ with gr.Row():
290
+ clr = gr.Button("Clear Chat", elem_classes="button-secondary")
291
+ cnl = gr.Button("Cancel Generation", elem_classes="button-secondary")
292
+
293
  with gr.Column(scale=7):
294
+ chat = gr.Chatbot(type="messages", height=500)
295
+ with gr.Row():
296
+ txt = gr.Textbox(
297
+ placeholder="Type your message and press Enter...",
298
+ lines=2,
299
+ show_label=False
300
+ )
301
+ dbg = gr.Markdown(elem_classes="search-results")
302
+
303
+ gr.HTML("""
304
+ <div class="footer">
305
+ <p>Qwen3 models developed by Alibaba Cloud. Interface powered by Gradio and ZeroGPU.</p>
306
+ </div>
307
+ """)
308
+
309
+ # Extract just the model name from the dropdown selection
310
+ def get_model_name(full_selection):
311
+ return full_selection.split(" - ")[0]
312
+
313
  search_chk.change(fn=update_default_prompt, inputs=search_chk, outputs=sys_prompt)
314
  clr.click(fn=lambda: ([], "", ""), outputs=[chat, txt, dbg])
315
  cnl.click(fn=cancel_generation, outputs=dbg)
316
+
317
+ txt.submit(
318
+ fn=lambda msg, history, prompt, search, mr, mc, model, tok, temp, k, p, rp:
319
+ chat_response(
320
+ msg, history, prompt, search, mr, mc,
321
+ get_model_name(model), tok, temp, k, p, rp
322
+ ),
323
+ inputs=[txt, chat, sys_prompt, search_chk, mr, mc,
324
+ model_dd, max_tok, temp, k, p, rp],
325
+ outputs=[chat, dbg],
326
+ show_progress=True
327
+ )
328
+
329
+ if __name__ == "__main__":
330
+ demo.launch()