lastmass commited on
Commit
e915b53
·
verified ·
1 Parent(s): f89f2b3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -10
app.py CHANGED
@@ -1,23 +1,91 @@
1
- import gradio as gr
2
- from llama_cpp import Llama
3
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
- model_path = os.getenv("MODEL_PATH", "/models/Qwen3_Medical_GRPO-i1-Q4_K_M.gguf")
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- # 初始化模型
8
- llm = Llama(model_path=model_path, n_ctx=4096, n_threads=8)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- # 定义 system prompt
11
  system_prompt = """You are given a problem.
12
  Think about the problem and provide your working out.
13
  Place it between <start_working_out> and <end_working_out>.
14
  Then, provide your solution between <SOLUTION></SOLUTION>"""
15
 
16
  def chat(user_input):
17
- # 在用户输入末尾加上 <start_working_out>
18
- prompt = system_prompt + "\n\nUser input: " + user_input + " <start_working_out>"
19
- response = llm(prompt, max_tokens=2048, temperature=0.7)
20
- return response["choices"][0]["text"]
 
 
 
 
 
21
 
22
  with gr.Blocks() as demo:
23
  gr.Markdown("# 🦙 GGUF Model Demo")
 
 
 
1
  import os
2
+ import sys
3
+ import time
4
+ from huggingface_hub import snapshot_download
5
+
6
+ # --- 配置(可通过环境变量覆盖) ---
7
+ MODEL_REPO = os.getenv("MODEL_REPO", "mradermacher/Qwen3_Medical_GRPO-i1-GGUF")
8
+ MODEL_FILE = os.getenv("MODEL_FILE", "Qwen3_Medical_GRPO.i1-Q4_K_M.gguf")
9
+ MODEL_DIR = os.getenv("MODEL_DIR", "/models")
10
+ MODEL_PATH = os.path.join(MODEL_DIR, MODEL_FILE)
11
+ HF_TOKEN = os.getenv("HF_TOKEN", None) # 如果模型是私有的,需要在 Spaces Secret 中设置这个值
12
+ # 可选线程设置(不设置则默认 8)
13
+ N_THREADS = int(os.getenv("N_THREADS", "8"))
14
+
15
+ # --- 确保模型文件存在:若不存在,则从 Hugging Face Hub 下载 ---
16
+ os.makedirs(MODEL_DIR, exist_ok=True)
17
+
18
+ def download_model_if_missing():
19
+ if os.path.exists(MODEL_PATH):
20
+ print(f"Model already exists at {MODEL_PATH}")
21
+ return
22
+
23
+ print(f"Model not found at {MODEL_PATH}. Attempting to download from {MODEL_REPO} ...")
24
+ try:
25
+ # snapshot_download 会把仓库内容下载到 MODEL_DIR;allow_patterns 只抓我们需要的文件
26
+ snapshot_download(
27
+ repo_id=MODEL_REPO,
28
+ repo_type="model",
29
+ local_dir=MODEL_DIR,
30
+ token=HF_TOKEN,
31
+ allow_patterns=[MODEL_FILE],
32
+ ignore_patterns=["*"] # 先默认忽略所有,allow_patterns 会覆盖需要的
33
+ )
34
+ except Exception as e:
35
+ print("Error while trying to download the model:", e, file=sys.stderr)
36
+ print("If the model is private, make sure HF_TOKEN is set in Space Secrets and has read access.", file=sys.stderr)
37
+ raise
38
+
39
+ # 等待短时间让文件系统稳定(可选)
40
+ time.sleep(1)
41
 
42
+ if not os.path.exists(MODEL_PATH):
43
+ # 有时 snapshot_download 会把文件放在子目录,尝试在 MODEL_DIR 下搜索
44
+ found = None
45
+ for root, dirs, files in os.walk(MODEL_DIR):
46
+ if MODEL_FILE in files:
47
+ found = os.path.join(root, MODEL_FILE)
48
+ break
49
+ if found:
50
+ print(f"Found model at {found}; moving to {MODEL_PATH}")
51
+ os.replace(found, MODEL_PATH)
52
+ else:
53
+ raise RuntimeError(f"Model download finished but {MODEL_PATH} still not found. Check repo contents.")
54
 
55
+ download_model_if_missing()
56
+
57
+ # --- 现在再导入并初始化 llama_cpp(确保模型已存在) ---
58
+ try:
59
+ from llama_cpp import Llama
60
+ except Exception as e:
61
+ print("Failed to import llama_cpp. Ensure the wheel you installed matches the runtime (musl vs glibc) and required libs are present.", file=sys.stderr)
62
+ raise
63
+
64
+ if not os.path.exists(MODEL_PATH):
65
+ raise RuntimeError(f"Model path does not exist after download: {MODEL_PATH}")
66
+
67
+ # 初始化模型(给 N_THREADS 一个合理默认)
68
+ n_threads = max(1, N_THREADS)
69
+ llm = Llama(model_path=MODEL_PATH, n_ctx=4096, n_threads=n_threads)
70
+
71
+ # --- system prompt 和 gradio 接口 ---
72
+ import gradio as gr
73
 
 
74
  system_prompt = """You are given a problem.
75
  Think about the problem and provide your working out.
76
  Place it between <start_working_out> and <end_working_out>.
77
  Then, provide your solution between <SOLUTION></SOLUTION>"""
78
 
79
  def chat(user_input):
80
+ try:
81
+ prompt = system_prompt + "\n\nUser input: " + user_input + " <start_working_out>"
82
+ response = llm(prompt, max_tokens=2048, temperature=0.7)
83
+ return response["choices"][0]["text"]
84
+ except Exception as e:
85
+ # 捕获运行时错误并返回友好提示(也会打印到容器日志)
86
+ err_msg = f"Error while generating: {e}"
87
+ print(err_msg, file=sys.stderr)
88
+ return err_msg
89
 
90
  with gr.Blocks() as demo:
91
  gr.Markdown("# 🦙 GGUF Model Demo")