Qwen3-1_7B

Running on Zero

App Files Files Community

openfree commited on Apr 29

Commit

2f0356e

verified ·

1 Parent(s): 92cc237

Update app.py

Browse files

Files changed (1) hide show

app.py +140 -56

app.py CHANGED Viewed

@@ -10,36 +10,19 @@ from transformers import pipeline, TextIteratorStreamer
 from duckduckgo_search import DDGS
 import spaces  # Import spaces early to enable ZeroGPU support
-# Optional: Disable GPU visibility if you wish to force CPU usage
-# os.environ["CUDA_VISIBLE_DEVICES"] = ""
 # ------------------------------
 # Global Cancellation Event
 # ------------------------------
 cancel_event = threading.Event()
 # ------------------------------
-# Torch-Compatible Model Definitions with Adjusted Descriptions
 # ------------------------------
 MODELS = {
-    "Qwen3-8B": {"repo_id": "Qwen/Qwen3-8B", "description": "Qwen3-8B"},
-    "Qwen3-4B": {"repo_id": "Qwen/Qwen3-4B", "description": "Qwen3-4B"},
-    "Qwen3-1.7B": {"repo_id": "Qwen/Qwen3-1,7B", "description": "Qwen3-1.7B"},
-    "Qwen3-0.6B": {"repo_id": "Qwen/Qwen3-0.6B", "description": "Qwen3-0.6B"},
-    "Gemma-3-4B-IT": {"repo_id": "unsloth/gemma-3-4b-it", "description": "Gemma-3-4B-IT"},
-    "SmolLM2-135M-Instruct-TaiwanChat": {"repo_id": "Luigi/SmolLM2-135M-Instruct-TaiwanChat", "description": "SmolLM2‑135M Instruct fine-tuned on TaiwanChat"},
-    "SmolLM2-135M-Instruct": {"repo_id": "HuggingFaceTB/SmolLM2-135M-Instruct", "description": "Original SmolLM2‑135M Instruct"},
-    "SmolLM2-360M-Instruct-TaiwanChat": {"repo_id": "Luigi/SmolLM2-360M-Instruct-TaiwanChat", "description": "SmolLM2‑360M Instruct fine-tuned on TaiwanChat"},
-    "Llama-3.2-Taiwan-3B-Instruct": {"repo_id": "lianghsun/Llama-3.2-Taiwan-3B-Instruct", "description": "Llama-3.2-Taiwan-3B-Instruct"},
-    "MiniCPM3-4B": {"repo_id": "openbmb/MiniCPM3-4B", "description": "MiniCPM3-4B"},
-    "Qwen2.5-3B-Instruct": {"repo_id": "Qwen/Qwen2.5-3B-Instruct", "description": "Qwen2.5-3B-Instruct"},
-    "Qwen2.5-7B-Instruct": {"repo_id": "Qwen/Qwen2.5-7B-Instruct", "description": "Qwen2.5-7B-Instruct"},
-    "Phi-4-mini-Instruct": {"repo_id": "unsloth/Phi-4-mini-instruct", "description": "Phi-4-mini-Instruct"},
-    "Meta-Llama-3.1-8B-Instruct": {"repo_id": "MaziyarPanahi/Meta-Llama-3.1-8B-Instruct", "description": "Meta-Llama-3.1-8B-Instruct"},
-    "DeepSeek-R1-Distill-Llama-8B": {"repo_id": "unsloth/DeepSeek-R1-Distill-Llama-8B", "description": "DeepSeek-R1-Distill-Llama-8B"},
-    "Mistral-7B-Instruct-v0.3": {"repo_id": "MaziyarPanahi/Mistral-7B-Instruct-v0.3", "description": "Mistral-7B-Instruct-v0.3"},
-    "Qwen2.5-Coder-7B-Instruct": {"repo_id": "Qwen/Qwen2.5-Coder-7B-Instruct", "description": "Qwen2.5-Coder-7B-Instruct"},
 }
 # Global cache for pipelines to avoid re-loading.
@@ -140,13 +123,6 @@ def chat_response(user_msg, chat_history, system_prompt,
     history.append({'role': 'assistant', 'content': ''})
     try:
-        # merge any fetched search results into the system prompt
-        if search_results:
-            enriched = system_prompt.strip() + "\n\nRelevant context:\n" + "\n".join(search_results)
-        else:
-            enriched = system_prompt
         # wait up to 1s for snippets, then replace debug with them
         if enable_search:
             thread_search.join(timeout=1.0)
@@ -207,40 +183,148 @@ def cancel_generation():
 def update_default_prompt(enable_search):
     today = datetime.now().strftime('%Y-%m-%d')
-    return f"You are a helpful assistant. Today is {today}."
 # ------------------------------
 # Gradio UI
 # ------------------------------
-with gr.Blocks(title="LLM Inference with ZeroGPU") as demo:
-    gr.Markdown("## 🧠 ZeroGPU LLM Inference with Web Search")
-    gr.Markdown("Interact with the model. Select parameters and chat below.")
     with gr.Row():
         with gr.Column(scale=3):
-            model_dd = gr.Dropdown(label="Select Model", choices=list(MODELS.keys()), value=list(MODELS.keys())[0])
-            search_chk = gr.Checkbox(label="Enable Web Search", value=True)
-            sys_prompt = gr.Textbox(label="System Prompt", lines=3, value=update_default_prompt(search_chk.value))
-            gr.Markdown("### Generation Parameters")
-            max_tok = gr.Slider(64, 1024, value=512, step=32, label="Max Tokens")
-            temp = gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature")
-            k = gr.Slider(1, 100, value=40, step=1, label="Top-K")
-            p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P")
-            rp = gr.Slider(1.0, 2.0, value=1.1, step=0.1, label="Repetition Penalty")
-            gr.Markdown("### Web Search Settings")
-            mr = gr.Number(value=6, precision=0, label="Max Results")
-            mc = gr.Number(value=600, precision=0, label="Max Chars/Result")
-            clr = gr.Button("Clear Chat")
-            cnl = gr.Button("Cancel Generation")
         with gr.Column(scale=7):
-            chat = gr.Chatbot(type="messages")
-            txt = gr.Textbox(placeholder="Type your message and press Enter...")
-            dbg = gr.Markdown()
     search_chk.change(fn=update_default_prompt, inputs=search_chk, outputs=sys_prompt)
     clr.click(fn=lambda: ([], "", ""), outputs=[chat, txt, dbg])
     cnl.click(fn=cancel_generation, outputs=dbg)
-    txt.submit(fn=chat_response,
-               inputs=[txt, chat, sys_prompt, search_chk, mr, mc,
-                       model_dd, max_tok, temp, k, p, rp],
-               outputs=[chat, dbg])
-    demo.launch()

 from duckduckgo_search import DDGS
 import spaces  # Import spaces early to enable ZeroGPU support
 # ------------------------------
 # Global Cancellation Event
 # ------------------------------
 cancel_event = threading.Event()
 # ------------------------------
+# Qwen3 Model Definitions
 # ------------------------------
 MODELS = {
+    "Qwen3-8B": {"repo_id": "Qwen/Qwen3-8B", "description": "Qwen3-8B - Largest model with highest capabilities"},
+    "Qwen3-4B": {"repo_id": "Qwen/Qwen3-4B", "description": "Qwen3-4B - Good balance of performance and efficiency"},
+    "Qwen3-1.7B": {"repo_id": "Qwen/Qwen3-1.7B", "description": "Qwen3-1.7B - Smaller model for faster responses"},
+    "Qwen3-0.6B": {"repo_id": "Qwen/Qwen3-0.6B", "description": "Qwen3-0.6B - Ultra-lightweight model"}
 }
 # Global cache for pipelines to avoid re-loading.
     history.append({'role': 'assistant', 'content': ''})
     try:
         # wait up to 1s for snippets, then replace debug with them
         if enable_search:
             thread_search.join(timeout=1.0)
 def update_default_prompt(enable_search):
     today = datetime.now().strftime('%Y-%m-%d')
+    if enable_search:
+        return f"""You are Qwen3, a helpful and friendly AI assistant created by Alibaba Cloud.
+Today is {today}.
+You have access to real-time web search to provide the most up-to-date information.
+Be concise, accurate, and helpful. When appropriate, use information from the web search results provided to you."""
+    else:
+        return f"""You are Qwen3, a helpful and friendly AI assistant created by Alibaba Cloud.
+Today is {today}.
+Be concise, accurate, and helpful in your responses."""
+# CSS for improved visual style
+css = """
+.gradio-container {
+    background-color: #f5f7fb !important;
+}
+.qwen-header {
+    background: linear-gradient(90deg, #0099FF, #0066CC);
+    padding: 20px;
+    border-radius: 10px;
+    margin-bottom: 20px;
+    text-align: center;
+    color: white;
+    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+}
+.qwen-container {
+    border-radius: 10px;
+    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
+    background: white;
+    padding: 20px;
+    margin-bottom: 20px;
+}
+.search-container {
+    background: #e9f0ff;
+    border-radius: 10px;
+    padding: 15px;
+    margin-bottom: 15px;
+}
+.controls-container {
+    background: #f0f4fa;
+    border-radius: 10px;
+    padding: 15px;
+    margin-bottom: 15px;
+}
+.model-select {
+    border: 2px solid #0099FF !important;
+    border-radius: 8px !important;
+}
+.button-primary {
+    background-color: #0099FF !important;
+    color: white !important;
+}
+.button-secondary {
+    background-color: #6c757d !important;
+    color: white !important;
+}
+.footer {
+    text-align: center;
+    margin-top: 20px;
+    font-size: 0.8em;
+    color: #666;
+}
+"""
 # ------------------------------
 # Gradio UI
 # ------------------------------
+with gr.Blocks(title="Qwen3 Chat", css=css) as demo:
+    gr.HTML("""
+    <div class="qwen-header">
+        <h1>🤖 Qwen3 Chat</h1>
+        <p>Interact with Alibaba Cloud's Qwen3 language models with optional web search capability</p>
+    </div>
+    """)
     with gr.Row():
         with gr.Column(scale=3):
+            with gr.Box(elem_classes="qwen-container"):
+                model_dd = gr.Dropdown(
+                    label="Select Qwen3 Model",
+                    choices=[f"{k} - {v['description']}" for k, v in MODELS.items()],
+                    value=f"{list(MODELS.keys())[0]} - {MODELS[list(MODELS.keys())[0]]['description']}",
+                    elem_classes="model-select"
+                )
+            with gr.Box(elem_classes="search-container"):
+                gr.Markdown("### 🔍 Search Settings")
+                search_chk = gr.Checkbox(label="Enable Web Search", value=True)
+                with gr.Row():
+                    mr = gr.Number(value=6, precision=0, label="Max Results", min_value=1, max_value=10)
+                    mc = gr.Number(value=600, precision=0, label="Max Chars/Result", min_value=100, max_value=1000)
+            with gr.Box(elem_classes="controls-container"):
+                gr.Markdown("### ⚙️ Generation Parameters")
+                sys_prompt = gr.Textbox(label="System Prompt", lines=5, value=update_default_prompt(True))
+                with gr.Row():
+                    max_tok = gr.Slider(64, 1024, value=512, step=32, label="Max Tokens")
+                with gr.Row():
+                    temp = gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature")
+                    p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P")
+                with gr.Row():
+                    k = gr.Slider(1, 100, value=40, step=1, label="Top-K")
+                    rp = gr.Slider(1.0, 2.0, value=1.1, step=0.1, label="Repetition Penalty")
+                with gr.Row():
+                    clr = gr.Button("Clear Chat", elem_classes="button-secondary")
+                    cnl = gr.Button("Cancel Generation", elem_classes="button-secondary")
         with gr.Column(scale=7):
+            chat = gr.Chatbot(type="messages", height=500)
+            with gr.Row():
+                txt = gr.Textbox(
+                    placeholder="Type your message and press Enter...",
+                    lines=2,
+                    show_label=False
+                )
+            dbg = gr.Markdown(elem_classes="search-results")
+    gr.HTML("""
+    <div class="footer">
+        <p>Qwen3 models developed by Alibaba Cloud. Interface powered by Gradio and ZeroGPU.</p>
+    </div>
+    """)
+    # Extract just the model name from the dropdown selection
+    def get_model_name(full_selection):
+        return full_selection.split(" - ")[0]
     search_chk.change(fn=update_default_prompt, inputs=search_chk, outputs=sys_prompt)
     clr.click(fn=lambda: ([], "", ""), outputs=[chat, txt, dbg])
     cnl.click(fn=cancel_generation, outputs=dbg)
+    txt.submit(
+        fn=lambda msg, history, prompt, search, mr, mc, model, tok, temp, k, p, rp:
+            chat_response(
+                msg, history, prompt, search, mr, mc,
+                get_model_name(model), tok, temp, k, p, rp
+            ),
+        inputs=[txt, chat, sys_prompt, search_chk, mr, mc,
+                model_dd, max_tok, temp, k, p, rp],
+        outputs=[chat, dbg],
+        show_progress=True
+    )
+if __name__ == "__main__":
+    demo.launch()