lastmass commited on
Commit
798a275
·
verified ·
1 Parent(s): 41a11e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -7
app.py CHANGED
@@ -5,7 +5,7 @@ import urllib.request
5
  from huggingface_hub import snapshot_download
6
 
7
  # --- 配置(可通过环境变量覆盖) ---
8
- MODEL_REPO = os.getenv("MODEL_REPO", "mradermacher/Qwen3_Medical_GRPO-i1-GGUF")
9
  MODEL_FILE = os.getenv("MODEL_FILE", "Qwen3_Medical_GRPO.i1-Q4_K_M.gguf")
10
  MODEL_DIR = os.getenv("MODEL_DIR", "/models")
11
  MODEL_PATH = os.path.join(MODEL_DIR, MODEL_FILE)
@@ -117,7 +117,7 @@ Then, provide your solution between <SOLUTION></SOLUTION>"""
117
  def chat(user_input):
118
  try:
119
  prompt = system_prompt + "\n\nUser input: " + user_input + " <start_working_out>"
120
- response = llm(prompt, max_tokens=2048, temperature=0.7)
121
  return response["choices"][0]["text"]
122
  except Exception as e:
123
  err_msg = f"Error while generating: {e}"
@@ -125,13 +125,33 @@ def chat(user_input):
125
  return err_msg
126
 
127
  with gr.Blocks() as demo:
128
- gr.Markdown("# 🦙 GGUF Model Demo")
 
 
129
  with gr.Row():
130
  with gr.Column():
131
- input_box = gr.Textbox(label="输入你的问题", placeholder="请输入问题...")
132
- submit_btn = gr.Button("生成回答")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  with gr.Column():
134
- output_box = gr.Textbox(label="模型回答", lines=10)
 
135
  submit_btn.click(fn=chat, inputs=input_box, outputs=output_box)
136
 
137
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
5
  from huggingface_hub import snapshot_download
6
 
7
  # --- 配置(可通过环境变量覆盖) ---
8
+ MODEL_REPO = os.getenv("MODEL_REPO", "lastmass/Qwen3_Medical_GRPO")
9
  MODEL_FILE = os.getenv("MODEL_FILE", "Qwen3_Medical_GRPO.i1-Q4_K_M.gguf")
10
  MODEL_DIR = os.getenv("MODEL_DIR", "/models")
11
  MODEL_PATH = os.path.join(MODEL_DIR, MODEL_FILE)
 
117
  def chat(user_input):
118
  try:
119
  prompt = system_prompt + "\n\nUser input: " + user_input + " <start_working_out>"
120
+ response = llm(prompt, max_tokens=1024, temperature=0.7) # 改为1024
121
  return response["choices"][0]["text"]
122
  except Exception as e:
123
  err_msg = f"Error while generating: {e}"
 
125
  return err_msg
126
 
127
  with gr.Blocks() as demo:
128
+ gr.Markdown("# 🏥 Qwen3 Medical GGUF Demo")
129
+
130
+ # 添加警告和说明信息
131
  with gr.Row():
132
  with gr.Column():
133
+ gr.HTML("""
134
+ <div style="background-color: #fff3cd; border: 1px solid #ffeaa7; border-radius: 5px; padding: 15px; margin-bottom: 20px;">
135
+ <h3 style="color: #856404; margin-top: 0;">⚠️ Performance Notice</h3>
136
+ <p style="color: #856404; margin-bottom: 10px;">
137
+ This demo runs the <strong>lastmass/Qwen3_Medical_GRPO</strong> model (Q4_K_M quantized version)
138
+ on Hugging Face's free CPU hardware. Inference is <strong>very slow</strong>.
139
+ </p>
140
+ <p style="color: #856404; margin-bottom: 0;">
141
+ For better performance, we recommend running inference <strong>locally</strong> with GPU acceleration.
142
+ Please refer to the <a href="https://huggingface.co/lastmass/Qwen3_Medical_GRPO" target="_blank">model repository</a>
143
+ for usage instructions. For optimal performance, use <strong>vLLM</strong> for inference.
144
+ </p>
145
+ </div>
146
+ """)
147
+
148
+ with gr.Row():
149
+ with gr.Column():
150
+ input_box = gr.Textbox(label="Input your question", placeholder="Please enter your medical question...")
151
+ submit_btn = gr.Button("Generate Response")
152
  with gr.Column():
153
+ output_box = gr.Textbox(label="Model Response", lines=10)
154
+
155
  submit_btn.click(fn=chat, inputs=input_box, outputs=output_box)
156
 
157
+ demo.launch(server_name="0.0.0.0", server_port=7860)