hackeracademy commited on
Commit
e82b7da
·
1 Parent(s): 4aeaa68

Serve Foundation-Sec-8B-Q4_K_M directly from upstream repo

Browse files
Files changed (3) hide show
  1. Dockerfile +7 -0
  2. app.py +59 -0
  3. requirements.txt +3 -0
Dockerfile ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+ WORKDIR /app
3
+ COPY requirements.txt .
4
+ RUN pip install --no-cache-dir -r requirements.txt
5
+ COPY . .
6
+ EXPOSE 7860
7
+ CMD ["python", "-u", "app.py"]
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, logging, requests, time
2
+ from contextlib import asynccontextmanager
3
+ from fastapi import FastAPI, HTTPException
4
+ from pydantic import BaseModel
5
+ from llama_cpp import Llama
6
+
7
+ # Direct public download link
8
+ MODEL_URL = (
9
+ "https://huggingface.co/fdtn-ai/Foundation-Sec-8B-Q4_K_M-GGUF/"
10
+ "resolve/main/foundation-sec-8b-q4_k_m.gguf"
11
+ )
12
+ MODEL_PATH = "foundation-sec-8b-q4_k_m.gguf"
13
+
14
+ @asynccontextmanager
15
+ async def lifespan(app: FastAPI):
16
+ logging.basicConfig(level=logging.INFO)
17
+
18
+ # Download once; skip if already present
19
+ if not os.path.exists(MODEL_PATH):
20
+ logging.info("Downloading model … (~4.9 GB)")
21
+ with requests.get(MODEL_URL, stream=True, timeout=30) as r:
22
+ r.raise_for_status()
23
+ with open(MODEL_PATH, "wb") as f:
24
+ for chunk in r.iter_content(chunk_size=8192):
25
+ f.write(chunk)
26
+ logging.info("Download finished.")
27
+
28
+ logging.info("Loading model …")
29
+ app.state.llm = Llama(
30
+ model_path=MODEL_PATH,
31
+ n_ctx=4096,
32
+ n_threads=os.cpu_count(),
33
+ verbose=False
34
+ )
35
+ logging.info("Model ready.")
36
+ yield
37
+ logging.info("Shutting down.")
38
+
39
+ app = FastAPI(lifespan=lifespan)
40
+
41
+ class ChatRequest(BaseModel):
42
+ messages: list[dict]
43
+ max_tokens: int = 256
44
+ temperature: float = 0.7
45
+
46
+ @app.get("/")
47
+ def root():
48
+ return {"message": "Foundation-Sec-8B API running on HF Space"}
49
+
50
+ @app.post("/v1/chat/completions")
51
+ def chat(req: ChatRequest):
52
+ try:
53
+ return app.state.llm.create_chat_completion(
54
+ messages=req.messages,
55
+ max_tokens=req.max_tokens,
56
+ temperature=req.temperature
57
+ )
58
+ except Exception as e:
59
+ raise HTTPException(status_code=500, detail=str(e))
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ fastapi==0.110
2
+ uvicorn[standard]==0.29
3
+ llama-cpp-python==0.2.77