Qwen3-1_7B

Running on Zero

App Files Files Community

openfree commited on Apr 29

Commit

c3064c8

verified ·

1 Parent(s): 3d57ea2

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -86

app.py CHANGED Viewed

@@ -2,12 +2,10 @@ import os
 import time
 import gc
 import threading
-from itertools import islice
 from datetime import datetime
 import gradio as gr
 import torch
 from transformers import pipeline, TextIteratorStreamer
-from duckduckgo_search import DDGS
 import spaces  # Import spaces early to enable ZeroGPU support
 # ------------------------------
@@ -62,20 +60,6 @@ def load_pipeline(model_name):
     PIPELINES[model_name] = pipe
     return pipe
-def retrieve_context(query, max_results=6, max_chars=600):
-    """
-    Retrieve search snippets from DuckDuckGo (runs in background).
-    Returns a list of result strings.
-    """
-    try:
-        with DDGS() as ddgs:
-            return [f"{i+1}. {r.get('title','No Title')} - {r.get('body','')[:max_chars]}"
-                    for i, r in enumerate(islice(ddgs.text(query, region="wt-wt", safesearch="off", timelimit="y"), max_results))]
-    except Exception:
-        return []
 def format_conversation(history, system_prompt):
     """
     Flatten chat history and system prompt into a single string.
@@ -94,52 +78,20 @@ def format_conversation(history, system_prompt):
 @spaces.GPU(duration=60)
 def chat_response(user_msg, chat_history, system_prompt,
-                  enable_search, max_results, max_chars,
                   model_name, max_tokens, temperature,
                   top_k, top_p, repeat_penalty):
     """
-    Generates streaming chat responses, optionally with background web search.
     """
     cancel_event.clear()
     history = list(chat_history or [])
     history.append({'role': 'user', 'content': user_msg})
-    # Launch web search if enabled
-    debug = ''
-    search_results = []
-    if enable_search:
-        debug = 'Search task started.'
-        thread_search = threading.Thread(
-            target=lambda: search_results.extend(
-                retrieve_context(user_msg, int(max_results), int(max_chars))
-            )
-        )
-        thread_search.daemon = True
-        thread_search.start()
-    else:
-        debug = 'Web search disabled.'
     # Prepare assistant placeholder
     history.append({'role': 'assistant', 'content': ''})
     try:
-        # wait up to 1s for snippets, then replace debug with them
-        if enable_search:
-            thread_search.join(timeout=1.0)
-            if search_results:
-                debug = "### Search results merged into prompt\n\n" + "\n".join(
-                    f"- {r}" for r in search_results
-                )
-            else:
-                debug = "*No web search results found.*"
-        # merge fetched snippets into the system prompt
-        if search_results:
-            enriched = system_prompt.strip() + "\n\nRelevant context:\n" + "\n".join(search_results)
-        else:
-            enriched = system_prompt
-        prompt = format_conversation(history, enriched)
         pipe = load_pipeline(model_name)
         streamer = TextIteratorStreamer(pipe.tokenizer,
@@ -166,30 +118,21 @@ def chat_response(user_msg, chat_history, system_prompt,
                 break
             assistant_text += chunk
             history[-1]['content'] = assistant_text
-            # Show debug only once
-            yield history, debug
         gen_thread.join()
     except Exception as e:
         history[-1]['content'] = f"Error: {e}"
-        yield history, debug
     finally:
         gc.collect()
 def cancel_generation():
     cancel_event.set()
     return 'Generation cancelled.'
-def update_default_prompt(enable_search):
     today = datetime.now().strftime('%Y-%m-%d')
-    if enable_search:
-        return f"""You are Qwen3, a helpful and friendly AI assistant created by Alibaba Cloud.
-Today is {today}.
-You have access to real-time web search to provide the most up-to-date information.
-Be concise, accurate, and helpful. When appropriate, use information from the web search results provided to you."""
-    else:
-        return f"""You are Qwen3, a helpful and friendly AI assistant created by Alibaba Cloud.
 Today is {today}.
 Be concise, accurate, and helpful in your responses."""
@@ -214,12 +157,6 @@ css = """
     padding: 20px;
     margin-bottom: 20px;
 }
-.search-container {
-    background: #e9f0ff;
-    border-radius: 10px;
-    padding: 15px;
-    margin-bottom: 15px;
-}
 .controls-container {
     background: #f0f4fa;
     border-radius: 10px;
@@ -253,7 +190,7 @@ with gr.Blocks(title="Qwen3 Chat", css=css) as demo:
     gr.HTML("""
     <div class="qwen-header">
         <h1>🤖 Qwen3 Chat</h1>
-        <p>Interact with Alibaba Cloud's Qwen3 language models with optional web search capability</p>
     </div>
     """)
@@ -267,16 +204,9 @@ with gr.Blocks(title="Qwen3 Chat", css=css) as demo:
                     elem_classes="model-select"
                 )
-            with gr.Group(elem_classes="search-container"):
-                gr.Markdown("### 🔍 Search Settings")
-                search_chk = gr.Checkbox(label="Enable Web Search", value=True)
-                with gr.Row():
-                    mr = gr.Number(value=6, precision=0, label="Max Results")
-                    mc = gr.Number(value=600, precision=0, label="Max Chars/Result")
             with gr.Group(elem_classes="controls-container"):
                 gr.Markdown("### ⚙️ Generation Parameters")
-                sys_prompt = gr.Textbox(label="System Prompt", lines=5, value=update_default_prompt(True))
                 with gr.Row():
                     max_tok = gr.Slider(64, 1024, value=512, step=32, label="Max Tokens")
                 with gr.Row():
@@ -298,7 +228,6 @@ with gr.Blocks(title="Qwen3 Chat", css=css) as demo:
                     lines=2,
                     show_label=False
                 )
-            dbg = gr.Markdown(elem_classes="search-results")
     gr.HTML("""
     <div class="footer">
@@ -310,19 +239,18 @@ with gr.Blocks(title="Qwen3 Chat", css=css) as demo:
     def get_model_name(full_selection):
         return full_selection.split(" - ")[0]
-    search_chk.change(fn=update_default_prompt, inputs=search_chk, outputs=sys_prompt)
-    clr.click(fn=lambda: ([], "", ""), outputs=[chat, txt, dbg])
-    cnl.click(fn=cancel_generation, outputs=dbg)
     txt.submit(
-        fn=lambda msg, history, prompt, search, mr, mc, model, tok, temp, k, p, rp:
             chat_response(
-                msg, history, prompt, search, mr, mc,
                 get_model_name(model), tok, temp, k, p, rp
             ),
-        inputs=[txt, chat, sys_prompt, search_chk, mr, mc,
                 model_dd, max_tok, temp, k, p, rp],
-        outputs=[chat, dbg],
         show_progress=True
     )

 import time
 import gc
 import threading
 from datetime import datetime
 import gradio as gr
 import torch
 from transformers import pipeline, TextIteratorStreamer
 import spaces  # Import spaces early to enable ZeroGPU support
 # ------------------------------
     PIPELINES[model_name] = pipe
     return pipe
 def format_conversation(history, system_prompt):
     """
     Flatten chat history and system prompt into a single string.
 @spaces.GPU(duration=60)
 def chat_response(user_msg, chat_history, system_prompt,
                   model_name, max_tokens, temperature,
                   top_k, top_p, repeat_penalty):
     """
+    Generates streaming chat responses.
     """
     cancel_event.clear()
     history = list(chat_history or [])
     history.append({'role': 'user', 'content': user_msg})
     # Prepare assistant placeholder
     history.append({'role': 'assistant', 'content': ''})
     try:
+        prompt = format_conversation(history, system_prompt)
         pipe = load_pipeline(model_name)
         streamer = TextIteratorStreamer(pipe.tokenizer,
                 break
             assistant_text += chunk
             history[-1]['content'] = assistant_text
+            yield history
         gen_thread.join()
     except Exception as e:
         history[-1]['content'] = f"Error: {e}"
+        yield history
     finally:
         gc.collect()
 def cancel_generation():
     cancel_event.set()
     return 'Generation cancelled.'
+def get_default_system_prompt():
     today = datetime.now().strftime('%Y-%m-%d')
+    return f"""You are Qwen3, a helpful and friendly AI assistant created by Alibaba Cloud.
 Today is {today}.
 Be concise, accurate, and helpful in your responses."""
     padding: 20px;
     margin-bottom: 20px;
 }
 .controls-container {
     background: #f0f4fa;
     border-radius: 10px;
     gr.HTML("""
     <div class="qwen-header">
         <h1>🤖 Qwen3 Chat</h1>
+        <p>Interact with Alibaba Cloud's Qwen3 language models</p>
     </div>
     """)
                     elem_classes="model-select"
                 )
             with gr.Group(elem_classes="controls-container"):
                 gr.Markdown("### ⚙️ Generation Parameters")
+                sys_prompt = gr.Textbox(label="System Prompt", lines=5, value=get_default_system_prompt())
                 with gr.Row():
                     max_tok = gr.Slider(64, 1024, value=512, step=32, label="Max Tokens")
                 with gr.Row():
                     lines=2,
                     show_label=False
                 )
     gr.HTML("""
     <div class="footer">
     def get_model_name(full_selection):
         return full_selection.split(" - ")[0]
+    clr.click(fn=lambda: ([], ""), outputs=[chat, txt])
+    cnl.click(fn=cancel_generation)
     txt.submit(
+        fn=lambda msg, history, prompt, model, tok, temp, k, p, rp:
             chat_response(
+                msg, history, prompt,
                 get_model_name(model), tok, temp, k, p, rp
             ),
+        inputs=[txt, chat, sys_prompt,
                 model_dd, max_tok, temp, k, p, rp],
+        outputs=[chat],
         show_progress=True
     )