Spaces:
Sleeping
Sleeping
import gradio as gr | |
from llama_cpp import Llama | |
import os | |
model_path = os.getenv("MODEL_PATH", "/models/Qwen3_Medical_GRPO-i1-Q4_K_M.gguf") | |
# 初始化模型 | |
llm = Llama(model_path=model_path, n_ctx=4096, n_threads=8) | |
# 定义 system prompt | |
system_prompt = """You are given a problem. | |
Think about the problem and provide your working out. | |
Place it between <start_working_out> and <end_working_out>. | |
Then, provide your solution between <SOLUTION></SOLUTION>""" | |
def chat(user_input): | |
# 在用户输入末尾加上 <start_working_out> | |
prompt = system_prompt + "\n\nUser input: " + user_input + " <start_working_out>" | |
response = llm(prompt, max_tokens=2048, temperature=0.7) | |
return response["choices"][0]["text"] | |
with gr.Blocks() as demo: | |
gr.Markdown("# 🦙 GGUF Model Demo") | |
with gr.Row(): | |
with gr.Column(): | |
input_box = gr.Textbox(label="输入你的问题", placeholder="请输入问题...") | |
submit_btn = gr.Button("生成回答") | |
with gr.Column(): | |
output_box = gr.Textbox(label="模型回答", lines=10) | |
submit_btn.click(fn=chat, inputs=input_box, outputs=output_box) | |
demo.launch(server_name="0.0.0.0", server_port=7860) | |