Qwen3_Medical / app.py
lastmass's picture
Update app.py
1fc7ac0 verified
raw
history blame
1.21 kB
import gradio as gr
from llama_cpp import Llama
import os
model_path = os.getenv("MODEL_PATH", "/models/Qwen3_Medical_GRPO-i1-Q4_K_M.gguf")
# 初始化模型
llm = Llama(model_path=model_path, n_ctx=4096, n_threads=8)
# 定义 system prompt
system_prompt = """You are given a problem.
Think about the problem and provide your working out.
Place it between <start_working_out> and <end_working_out>.
Then, provide your solution between <SOLUTION></SOLUTION>"""
def chat(user_input):
# 在用户输入末尾加上 <start_working_out>
prompt = system_prompt + "\n\nUser input: " + user_input + " <start_working_out>"
response = llm(prompt, max_tokens=2048, temperature=0.7)
return response["choices"][0]["text"]
with gr.Blocks() as demo:
gr.Markdown("# 🦙 GGUF Model Demo")
with gr.Row():
with gr.Column():
input_box = gr.Textbox(label="输入你的问题", placeholder="请输入问题...")
submit_btn = gr.Button("生成回答")
with gr.Column():
output_box = gr.Textbox(label="模型回答", lines=10)
submit_btn.click(fn=chat, inputs=input_box, outputs=output_box)
demo.launch(server_name="0.0.0.0", server_port=7860)