Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -5,7 +5,7 @@ import urllib.request
|
|
5 |
from huggingface_hub import snapshot_download
|
6 |
|
7 |
# --- 配置(可通过环境变量覆盖) ---
|
8 |
-
MODEL_REPO = os.getenv("MODEL_REPO", "
|
9 |
MODEL_FILE = os.getenv("MODEL_FILE", "Qwen3_Medical_GRPO.i1-Q4_K_M.gguf")
|
10 |
MODEL_DIR = os.getenv("MODEL_DIR", "/models")
|
11 |
MODEL_PATH = os.path.join(MODEL_DIR, MODEL_FILE)
|
@@ -117,7 +117,7 @@ Then, provide your solution between <SOLUTION></SOLUTION>"""
|
|
117 |
def chat(user_input):
|
118 |
try:
|
119 |
prompt = system_prompt + "\n\nUser input: " + user_input + " <start_working_out>"
|
120 |
-
response = llm(prompt, max_tokens=
|
121 |
return response["choices"][0]["text"]
|
122 |
except Exception as e:
|
123 |
err_msg = f"Error while generating: {e}"
|
@@ -125,13 +125,33 @@ def chat(user_input):
|
|
125 |
return err_msg
|
126 |
|
127 |
with gr.Blocks() as demo:
|
128 |
-
gr.Markdown("#
|
|
|
|
|
129 |
with gr.Row():
|
130 |
with gr.Column():
|
131 |
-
|
132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
with gr.Column():
|
134 |
-
output_box = gr.Textbox(label="
|
|
|
135 |
submit_btn.click(fn=chat, inputs=input_box, outputs=output_box)
|
136 |
|
137 |
-
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
5 |
from huggingface_hub import snapshot_download
|
6 |
|
7 |
# --- 配置(可通过环境变量覆盖) ---
|
8 |
+
MODEL_REPO = os.getenv("MODEL_REPO", "lastmass/Qwen3_Medical_GRPO")
|
9 |
MODEL_FILE = os.getenv("MODEL_FILE", "Qwen3_Medical_GRPO.i1-Q4_K_M.gguf")
|
10 |
MODEL_DIR = os.getenv("MODEL_DIR", "/models")
|
11 |
MODEL_PATH = os.path.join(MODEL_DIR, MODEL_FILE)
|
|
|
117 |
def chat(user_input):
|
118 |
try:
|
119 |
prompt = system_prompt + "\n\nUser input: " + user_input + " <start_working_out>"
|
120 |
+
response = llm(prompt, max_tokens=1024, temperature=0.7) # 改为1024
|
121 |
return response["choices"][0]["text"]
|
122 |
except Exception as e:
|
123 |
err_msg = f"Error while generating: {e}"
|
|
|
125 |
return err_msg
|
126 |
|
127 |
with gr.Blocks() as demo:
|
128 |
+
gr.Markdown("# 🏥 Qwen3 Medical GGUF Demo")
|
129 |
+
|
130 |
+
# 添加警告和说明信息
|
131 |
with gr.Row():
|
132 |
with gr.Column():
|
133 |
+
gr.HTML("""
|
134 |
+
<div style="background-color: #fff3cd; border: 1px solid #ffeaa7; border-radius: 5px; padding: 15px; margin-bottom: 20px;">
|
135 |
+
<h3 style="color: #856404; margin-top: 0;">⚠️ Performance Notice</h3>
|
136 |
+
<p style="color: #856404; margin-bottom: 10px;">
|
137 |
+
This demo runs the <strong>lastmass/Qwen3_Medical_GRPO</strong> model (Q4_K_M quantized version)
|
138 |
+
on Hugging Face's free CPU hardware. Inference is <strong>very slow</strong>.
|
139 |
+
</p>
|
140 |
+
<p style="color: #856404; margin-bottom: 0;">
|
141 |
+
For better performance, we recommend running inference <strong>locally</strong> with GPU acceleration.
|
142 |
+
Please refer to the <a href="https://huggingface.co/lastmass/Qwen3_Medical_GRPO" target="_blank">model repository</a>
|
143 |
+
for usage instructions. For optimal performance, use <strong>vLLM</strong> for inference.
|
144 |
+
</p>
|
145 |
+
</div>
|
146 |
+
""")
|
147 |
+
|
148 |
+
with gr.Row():
|
149 |
+
with gr.Column():
|
150 |
+
input_box = gr.Textbox(label="Input your question", placeholder="Please enter your medical question...")
|
151 |
+
submit_btn = gr.Button("Generate Response")
|
152 |
with gr.Column():
|
153 |
+
output_box = gr.Textbox(label="Model Response", lines=10)
|
154 |
+
|
155 |
submit_btn.click(fn=chat, inputs=input_box, outputs=output_box)
|
156 |
|
157 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|