Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,7 +5,7 @@ import urllib.request
|
|
| 5 |
from huggingface_hub import snapshot_download
|
| 6 |
|
| 7 |
# --- 配置(可通过环境变量覆盖) ---
|
| 8 |
-
MODEL_REPO = os.getenv("MODEL_REPO", "
|
| 9 |
MODEL_FILE = os.getenv("MODEL_FILE", "Qwen3_Medical_GRPO.i1-Q4_K_M.gguf")
|
| 10 |
MODEL_DIR = os.getenv("MODEL_DIR", "/models")
|
| 11 |
MODEL_PATH = os.path.join(MODEL_DIR, MODEL_FILE)
|
|
@@ -117,7 +117,7 @@ Then, provide your solution between <SOLUTION></SOLUTION>"""
|
|
| 117 |
def chat(user_input):
|
| 118 |
try:
|
| 119 |
prompt = system_prompt + "\n\nUser input: " + user_input + " <start_working_out>"
|
| 120 |
-
response = llm(prompt, max_tokens=
|
| 121 |
return response["choices"][0]["text"]
|
| 122 |
except Exception as e:
|
| 123 |
err_msg = f"Error while generating: {e}"
|
|
@@ -125,13 +125,33 @@ def chat(user_input):
|
|
| 125 |
return err_msg
|
| 126 |
|
| 127 |
with gr.Blocks() as demo:
|
| 128 |
-
gr.Markdown("#
|
|
|
|
|
|
|
| 129 |
with gr.Row():
|
| 130 |
with gr.Column():
|
| 131 |
-
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
with gr.Column():
|
| 134 |
-
output_box = gr.Textbox(label="
|
|
|
|
| 135 |
submit_btn.click(fn=chat, inputs=input_box, outputs=output_box)
|
| 136 |
|
| 137 |
-
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
|
| 5 |
from huggingface_hub import snapshot_download
|
| 6 |
|
| 7 |
# --- 配置(可通过环境变量覆盖) ---
|
| 8 |
+
MODEL_REPO = os.getenv("MODEL_REPO", "lastmass/Qwen3_Medical_GRPO")
|
| 9 |
MODEL_FILE = os.getenv("MODEL_FILE", "Qwen3_Medical_GRPO.i1-Q4_K_M.gguf")
|
| 10 |
MODEL_DIR = os.getenv("MODEL_DIR", "/models")
|
| 11 |
MODEL_PATH = os.path.join(MODEL_DIR, MODEL_FILE)
|
|
|
|
| 117 |
def chat(user_input):
|
| 118 |
try:
|
| 119 |
prompt = system_prompt + "\n\nUser input: " + user_input + " <start_working_out>"
|
| 120 |
+
response = llm(prompt, max_tokens=1024, temperature=0.7) # 改为1024
|
| 121 |
return response["choices"][0]["text"]
|
| 122 |
except Exception as e:
|
| 123 |
err_msg = f"Error while generating: {e}"
|
|
|
|
| 125 |
return err_msg
|
| 126 |
|
| 127 |
with gr.Blocks() as demo:
|
| 128 |
+
gr.Markdown("# 🏥 Qwen3 Medical GGUF Demo")
|
| 129 |
+
|
| 130 |
+
# 添加警告和说明信息
|
| 131 |
with gr.Row():
|
| 132 |
with gr.Column():
|
| 133 |
+
gr.HTML("""
|
| 134 |
+
<div style="background-color: #fff3cd; border: 1px solid #ffeaa7; border-radius: 5px; padding: 15px; margin-bottom: 20px;">
|
| 135 |
+
<h3 style="color: #856404; margin-top: 0;">⚠️ Performance Notice</h3>
|
| 136 |
+
<p style="color: #856404; margin-bottom: 10px;">
|
| 137 |
+
This demo runs the <strong>lastmass/Qwen3_Medical_GRPO</strong> model (Q4_K_M quantized version)
|
| 138 |
+
on Hugging Face's free CPU hardware. Inference is <strong>very slow</strong>.
|
| 139 |
+
</p>
|
| 140 |
+
<p style="color: #856404; margin-bottom: 0;">
|
| 141 |
+
For better performance, we recommend running inference <strong>locally</strong> with GPU acceleration.
|
| 142 |
+
Please refer to the <a href="https://huggingface.co/lastmass/Qwen3_Medical_GRPO" target="_blank">model repository</a>
|
| 143 |
+
for usage instructions. For optimal performance, use <strong>vLLM</strong> for inference.
|
| 144 |
+
</p>
|
| 145 |
+
</div>
|
| 146 |
+
""")
|
| 147 |
+
|
| 148 |
+
with gr.Row():
|
| 149 |
+
with gr.Column():
|
| 150 |
+
input_box = gr.Textbox(label="Input your question", placeholder="Please enter your medical question...")
|
| 151 |
+
submit_btn = gr.Button("Generate Response")
|
| 152 |
with gr.Column():
|
| 153 |
+
output_box = gr.Textbox(label="Model Response", lines=10)
|
| 154 |
+
|
| 155 |
submit_btn.click(fn=chat, inputs=input_box, outputs=output_box)
|
| 156 |
|
| 157 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|