Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,23 +1,91 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
from llama_cpp import Llama
|
3 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
-
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
-
# 定义 system prompt
|
11 |
system_prompt = """You are given a problem.
|
12 |
Think about the problem and provide your working out.
|
13 |
Place it between <start_working_out> and <end_working_out>.
|
14 |
Then, provide your solution between <SOLUTION></SOLUTION>"""
|
15 |
|
16 |
def chat(user_input):
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
with gr.Blocks() as demo:
|
23 |
gr.Markdown("# 🦙 GGUF Model Demo")
|
|
|
|
|
|
|
1 |
import os
|
2 |
+
import sys
|
3 |
+
import time
|
4 |
+
from huggingface_hub import snapshot_download
|
5 |
+
|
6 |
+
# --- 配置(可通过环境变量覆盖) ---
|
7 |
+
MODEL_REPO = os.getenv("MODEL_REPO", "mradermacher/Qwen3_Medical_GRPO-i1-GGUF")
|
8 |
+
MODEL_FILE = os.getenv("MODEL_FILE", "Qwen3_Medical_GRPO.i1-Q4_K_M.gguf")
|
9 |
+
MODEL_DIR = os.getenv("MODEL_DIR", "/models")
|
10 |
+
MODEL_PATH = os.path.join(MODEL_DIR, MODEL_FILE)
|
11 |
+
HF_TOKEN = os.getenv("HF_TOKEN", None) # 如果模型是私有的,需要在 Spaces Secret 中设置这个值
|
12 |
+
# 可选线程设置(不设置则默认 8)
|
13 |
+
N_THREADS = int(os.getenv("N_THREADS", "8"))
|
14 |
+
|
15 |
+
# --- 确保模型文件存在:若不存在,则从 Hugging Face Hub 下载 ---
|
16 |
+
os.makedirs(MODEL_DIR, exist_ok=True)
|
17 |
+
|
18 |
+
def download_model_if_missing():
|
19 |
+
if os.path.exists(MODEL_PATH):
|
20 |
+
print(f"Model already exists at {MODEL_PATH}")
|
21 |
+
return
|
22 |
+
|
23 |
+
print(f"Model not found at {MODEL_PATH}. Attempting to download from {MODEL_REPO} ...")
|
24 |
+
try:
|
25 |
+
# snapshot_download 会把仓库内容下载到 MODEL_DIR;allow_patterns 只抓我们需要的文件
|
26 |
+
snapshot_download(
|
27 |
+
repo_id=MODEL_REPO,
|
28 |
+
repo_type="model",
|
29 |
+
local_dir=MODEL_DIR,
|
30 |
+
token=HF_TOKEN,
|
31 |
+
allow_patterns=[MODEL_FILE],
|
32 |
+
ignore_patterns=["*"] # 先默认忽略所有,allow_patterns 会覆盖需要的
|
33 |
+
)
|
34 |
+
except Exception as e:
|
35 |
+
print("Error while trying to download the model:", e, file=sys.stderr)
|
36 |
+
print("If the model is private, make sure HF_TOKEN is set in Space Secrets and has read access.", file=sys.stderr)
|
37 |
+
raise
|
38 |
+
|
39 |
+
# 等待短时间让文件系统稳定(可选)
|
40 |
+
time.sleep(1)
|
41 |
|
42 |
+
if not os.path.exists(MODEL_PATH):
|
43 |
+
# 有时 snapshot_download 会把文件放在子目录,尝试在 MODEL_DIR 下搜索
|
44 |
+
found = None
|
45 |
+
for root, dirs, files in os.walk(MODEL_DIR):
|
46 |
+
if MODEL_FILE in files:
|
47 |
+
found = os.path.join(root, MODEL_FILE)
|
48 |
+
break
|
49 |
+
if found:
|
50 |
+
print(f"Found model at {found}; moving to {MODEL_PATH}")
|
51 |
+
os.replace(found, MODEL_PATH)
|
52 |
+
else:
|
53 |
+
raise RuntimeError(f"Model download finished but {MODEL_PATH} still not found. Check repo contents.")
|
54 |
|
55 |
+
download_model_if_missing()
|
56 |
+
|
57 |
+
# --- 现在再导入并初始化 llama_cpp(确保模型已存在) ---
|
58 |
+
try:
|
59 |
+
from llama_cpp import Llama
|
60 |
+
except Exception as e:
|
61 |
+
print("Failed to import llama_cpp. Ensure the wheel you installed matches the runtime (musl vs glibc) and required libs are present.", file=sys.stderr)
|
62 |
+
raise
|
63 |
+
|
64 |
+
if not os.path.exists(MODEL_PATH):
|
65 |
+
raise RuntimeError(f"Model path does not exist after download: {MODEL_PATH}")
|
66 |
+
|
67 |
+
# 初始化模型(给 N_THREADS 一个合理默认)
|
68 |
+
n_threads = max(1, N_THREADS)
|
69 |
+
llm = Llama(model_path=MODEL_PATH, n_ctx=4096, n_threads=n_threads)
|
70 |
+
|
71 |
+
# --- system prompt 和 gradio 接口 ---
|
72 |
+
import gradio as gr
|
73 |
|
|
|
74 |
system_prompt = """You are given a problem.
|
75 |
Think about the problem and provide your working out.
|
76 |
Place it between <start_working_out> and <end_working_out>.
|
77 |
Then, provide your solution between <SOLUTION></SOLUTION>"""
|
78 |
|
79 |
def chat(user_input):
|
80 |
+
try:
|
81 |
+
prompt = system_prompt + "\n\nUser input: " + user_input + " <start_working_out>"
|
82 |
+
response = llm(prompt, max_tokens=2048, temperature=0.7)
|
83 |
+
return response["choices"][0]["text"]
|
84 |
+
except Exception as e:
|
85 |
+
# 捕获运行时错误并返回友好提示(也会打印到容器日志)
|
86 |
+
err_msg = f"Error while generating: {e}"
|
87 |
+
print(err_msg, file=sys.stderr)
|
88 |
+
return err_msg
|
89 |
|
90 |
with gr.Blocks() as demo:
|
91 |
gr.Markdown("# 🦙 GGUF Model Demo")
|