Spaces:

hackeracademy
/

foundation-sec-llm-api

Sleeping

App Files Files Community

hackeracademy commited on 18 days ago

Commit

e82b7da

1 Parent(s): 4aeaa68

Serve Foundation-Sec-8B-Q4_K_M directly from upstream repo

Browse files

Files changed (3) hide show

Dockerfile +7 -0
app.py +59 -0
requirements.txt +3 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,7 @@

+FROM python:3.11-slim
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+EXPOSE 7860
+CMD ["python", "-u", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import os, logging, requests, time
+from contextlib import asynccontextmanager
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from llama_cpp import Llama
+# Direct public download link
+MODEL_URL = (
+    "https://huggingface.co/fdtn-ai/Foundation-Sec-8B-Q4_K_M-GGUF/"
+    "resolve/main/foundation-sec-8b-q4_k_m.gguf"
+)
+MODEL_PATH = "foundation-sec-8b-q4_k_m.gguf"
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    logging.basicConfig(level=logging.INFO)
+    # Download once; skip if already present
+    if not os.path.exists(MODEL_PATH):
+        logging.info("Downloading model … (~4.9 GB)")
+        with requests.get(MODEL_URL, stream=True, timeout=30) as r:
+            r.raise_for_status()
+            with open(MODEL_PATH, "wb") as f:
+                for chunk in r.iter_content(chunk_size=8192):
+                    f.write(chunk)
+        logging.info("Download finished.")
+    logging.info("Loading model …")
+    app.state.llm = Llama(
+        model_path=MODEL_PATH,
+        n_ctx=4096,
+        n_threads=os.cpu_count(),
+        verbose=False
+    )
+    logging.info("Model ready.")
+    yield
+    logging.info("Shutting down.")
+app = FastAPI(lifespan=lifespan)
+class ChatRequest(BaseModel):
+    messages: list[dict]
+    max_tokens: int = 256
+    temperature: float = 0.7
+@app.get("/")
+def root():
+    return {"message": "Foundation-Sec-8B API running on HF Space"}
+@app.post("/v1/chat/completions")
+def chat(req: ChatRequest):
+    try:
+        return app.state.llm.create_chat_completion(
+            messages=req.messages,
+            max_tokens=req.max_tokens,
+            temperature=req.temperature
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+fastapi==0.110
+uvicorn[standard]==0.29
+llama-cpp-python==0.2.77