Spaces:

lastmass
/

Qwen3_Medical

Sleeping

App Files Files Community

lastmass commited on Sep 1

Commit

798a275

verified ·

1 Parent(s): 41a11e9

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -7

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import urllib.request
 from huggingface_hub import snapshot_download
 # --- 配置（可通过环境变量覆盖） ---
-MODEL_REPO = os.getenv("MODEL_REPO", "mradermacher/Qwen3_Medical_GRPO-i1-GGUF")
 MODEL_FILE = os.getenv("MODEL_FILE", "Qwen3_Medical_GRPO.i1-Q4_K_M.gguf")
 MODEL_DIR = os.getenv("MODEL_DIR", "/models")
 MODEL_PATH = os.path.join(MODEL_DIR, MODEL_FILE)
@@ -117,7 +117,7 @@ Then, provide your solution between <SOLUTION></SOLUTION>"""
 def chat(user_input):
     try:
         prompt = system_prompt + "\n\nUser input: " + user_input + " <start_working_out>"
-        response = llm(prompt, max_tokens=2048, temperature=0.7)
         return response["choices"][0]["text"]
     except Exception as e:
         err_msg = f"Error while generating: {e}"
@@ -125,13 +125,33 @@ def chat(user_input):
         return err_msg
 with gr.Blocks() as demo:
-    gr.Markdown("# 🦙 GGUF Model Demo")
     with gr.Row():
         with gr.Column():
-            input_box = gr.Textbox(label="输入你的问题", placeholder="请输入问题...")
-            submit_btn = gr.Button("生成回答")
         with gr.Column():
-            output_box = gr.Textbox(label="模型回答", lines=10)
     submit_btn.click(fn=chat, inputs=input_box, outputs=output_box)
-demo.launch(server_name="0.0.0.0", server_port=7860)

 from huggingface_hub import snapshot_download
 # --- 配置（可通过环境变量覆盖） ---
+MODEL_REPO = os.getenv("MODEL_REPO", "lastmass/Qwen3_Medical_GRPO")
 MODEL_FILE = os.getenv("MODEL_FILE", "Qwen3_Medical_GRPO.i1-Q4_K_M.gguf")
 MODEL_DIR = os.getenv("MODEL_DIR", "/models")
 MODEL_PATH = os.path.join(MODEL_DIR, MODEL_FILE)
 def chat(user_input):
     try:
         prompt = system_prompt + "\n\nUser input: " + user_input + " <start_working_out>"
+        response = llm(prompt, max_tokens=1024, temperature=0.7)  # 改为1024
         return response["choices"][0]["text"]
     except Exception as e:
         err_msg = f"Error while generating: {e}"
         return err_msg
 with gr.Blocks() as demo:
+    gr.Markdown("# 🏥 Qwen3 Medical GGUF Demo")
+    # 添加警告和说明信息
     with gr.Row():
         with gr.Column():
+            gr.HTML("""
+            <div style="background-color: #fff3cd; border: 1px solid #ffeaa7; border-radius: 5px; padding: 15px; margin-bottom: 20px;">
+                <h3 style="color: #856404; margin-top: 0;">⚠️ Performance Notice</h3>
+                <p style="color: #856404; margin-bottom: 10px;">
+                    This demo runs the <strong>lastmass/Qwen3_Medical_GRPO</strong> model (Q4_K_M quantized version)
+                    on Hugging Face's free CPU hardware. Inference is <strong>very slow</strong>.
+                </p>
+                <p style="color: #856404; margin-bottom: 0;">
+                    For better performance, we recommend running inference <strong>locally</strong> with GPU acceleration.
+                    Please refer to the <a href="https://huggingface.co/lastmass/Qwen3_Medical_GRPO" target="_blank">model repository</a>
+                    for usage instructions. For optimal performance, use <strong>vLLM</strong> for inference.
+                </p>
+            </div>
+            """)
+    with gr.Row():
+        with gr.Column():
+            input_box = gr.Textbox(label="Input your question", placeholder="Please enter your medical question...")
+            submit_btn = gr.Button("Generate Response")
         with gr.Column():
+            output_box = gr.Textbox(label="Model Response", lines=10)
     submit_btn.click(fn=chat, inputs=input_box, outputs=output_box)
+demo.launch(server_name="0.0.0.0", server_port=7860)