Spaces:

omaryasserhassan
/

llm_server

Sleeping

omaryasserhassan commited on Aug 14

Commit

14e7061

verified ·

1 Parent(s): 54f48c0

Update Dockerfile

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -1,8 +1,30 @@
 FROM python:3.10-slim
 WORKDIR /app
 COPY requirements.txt .
-RUN pip install --no-cache-dir -r requirements.txt
 COPY . .
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

 FROM python:3.10-slim
+# Install system dependencies for llama.cpp
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    cmake \
+    git \
+    && rm -rf /var/lib/apt/lists/*
 WORKDIR /app
+# Copy requirement list first for caching
 COPY requirements.txt .
+# Install Python dependencies (llama-cpp-python compiled with BLAS disabled for HF CPU Spaces)
+RUN CMAKE_ARGS="-DLLAMA_BLAS=OFF -DLLAMA_CUBLAS=OFF" \
+    pip install --no-cache-dir -r requirements.txt
+# Copy app files
 COPY . .
+# Pre-download model at build time to speed up startup
+RUN python -c "from huggingface_hub import hf_hub_download; \
+    hf_hub_download(repo_id='bartowski/Llama-3.2-3B-Instruct-GGUF', \
+    filename='Llama-3.2-3B-Instruct-Q4_K_M.gguf', \
+    cache_dir='/app/models', local_dir_use_symlinks=False)"
+EXPOSE 7860
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]