hackeracademy commited on
Commit
5d8cca4
·
1 Parent(s): 222587e

Use Gradio wrapper to satisfy HF Spaces port requirement

Browse files
Files changed (3) hide show
  1. Dockerfile +1 -4
  2. app.py +17 -50
  3. requirements.txt +1 -1
Dockerfile CHANGED
@@ -1,11 +1,8 @@
1
  FROM python:3.11-alpine
2
-
3
- RUN apk add --no-cache build-base libffi-dev cmake git curl
4
  WORKDIR /app
5
-
6
  COPY requirements.txt .
7
  RUN pip install --no-cache-dir -r requirements.txt
8
-
9
  COPY . .
10
  EXPOSE 7860
11
  CMD ["python", "-u", "app.py"]
 
1
  FROM python:3.11-alpine
2
+ RUN apk add --no-cache build-base libffi-dev cmake git
 
3
  WORKDIR /app
 
4
  COPY requirements.txt .
5
  RUN pip install --no-cache-dir -r requirements.txt
 
6
  COPY . .
7
  EXPOSE 7860
8
  CMD ["python", "-u", "app.py"]
app.py CHANGED
@@ -1,6 +1,4 @@
1
- import os, logging, requests, threading, uvicorn
2
- from fastapi import FastAPI, HTTPException
3
- from pydantic import BaseModel
4
  from llama_cpp import Llama
5
 
6
  MODEL_URL = (
@@ -9,54 +7,23 @@ MODEL_URL = (
9
  )
10
  MODEL_PATH = "foundation-sec-8b-q4_k_m.gguf"
11
 
12
- logging.basicConfig(level=logging.INFO)
 
 
 
 
 
 
 
13
 
14
- # --- tiny “alive” route so HF sees the container immediately ---
15
- app = FastAPI()
16
 
17
- @app.get("/")
18
- def root():
19
- return {"status": "loading model …"}
 
20
 
21
- # --- download once, in a background thread so / stays alive ---
22
- def download_model():
23
- if not os.path.exists(MODEL_PATH):
24
- logging.info("Downloading model …")
25
- with requests.get(MODEL_URL, stream=True, timeout=30) as r:
26
- r.raise_for_status()
27
- with open(MODEL_PATH, "wb") as f:
28
- for chunk in r.iter_content(chunk_size=8192):
29
- f.write(chunk)
30
- logging.info("Download finished.")
31
- logging.info("Loading model into RAM …")
32
- app.state.llm = Llama(
33
- model_path=MODEL_PATH,
34
- n_ctx=4096,
35
- n_threads=os.cpu_count(),
36
- verbose=False
37
- )
38
- logging.info("Model ready.")
39
 
40
- threading.Thread(target=download_model, daemon=True).start()
41
-
42
- class ChatRequest(BaseModel):
43
- messages: list[dict]
44
- max_tokens: int = 256
45
- temperature: float = 0.7
46
-
47
- @app.post("/v1/chat/completions")
48
- def chat(req: ChatRequest):
49
- if not hasattr(app.state, "llm"):
50
- raise HTTPException(
51
- status_code=503,
52
- detail="Model still loading, please retry in ~30 s"
53
- )
54
- return app.state.llm.create_chat_completion(
55
- messages=req.messages,
56
- max_tokens=req.max_tokens,
57
- temperature=req.temperature
58
- )
59
-
60
- # --- start uvicorn on port 7860 (HF expects this) ---
61
- if __name__ == "__main__":
62
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
+ import os, gradio as gr
 
 
2
  from llama_cpp import Llama
3
 
4
  MODEL_URL = (
 
7
  )
8
  MODEL_PATH = "foundation-sec-8b-q4_k_m.gguf"
9
 
10
+ # download once (Gradio blocks until the file exists)
11
+ if not os.path.exists(MODEL_PATH):
12
+ import requests, time
13
+ with requests.get(MODEL_URL, stream=True) as r:
14
+ r.raise_for_status()
15
+ with open(MODEL_PATH, "wb") as f:
16
+ for chunk in r.iter_content(chunk_size=8192):
17
+ f.write(chunk)
18
 
19
+ llm = Llama(model_path=MODEL_PATH, n_ctx=4096, verbose=False)
 
20
 
21
+ def chat_fn(message, history):
22
+ messages = [{"role": "user", "content": message}]
23
+ out = llm.create_chat_completion(messages=messages, max_tokens=256, temperature=0.7)
24
+ return out["choices"][0]["message"]["content"]
25
 
26
+ demo = gr.ChatInterface(chat_fn, title="Foundation-Sec-8B")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ # expose on 0.0.0.0:7860 (Gradio default)
29
+ demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
  fastapi==0.110
2
- uvicorn[standard]==0.29
3
  --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
4
  llama-cpp-python==0.2.90
5
  requests # <-- add this line
 
 
1
  fastapi==0.110
 
2
  --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
3
  llama-cpp-python==0.2.90
4
  requests # <-- add this line
5
+ gradio==4.43.0 # or latest