Spaces:
Sleeping
Sleeping
FROM python:3.10-slim | |
# Install system dependencies for llama.cpp | |
RUN apt-get update && apt-get install -y --no-install-recommends \ | |
build-essential \ | |
cmake \ | |
git \ | |
&& rm -rf /var/lib/apt/lists/* | |
WORKDIR /app | |
# Copy requirement list first for caching | |
COPY requirements.txt . | |
# Install Python dependencies (llama-cpp-python compiled with BLAS disabled for HF CPU Spaces) | |
RUN CMAKE_ARGS="-DLLAMA_BLAS=OFF -DLLAMA_CUBLAS=OFF" \ | |
pip install --no-cache-dir -r requirements.txt | |
# Copy app files | |
COPY . . | |
# Pre-download model at build time to speed up startup | |
RUN python -c "from huggingface_hub import hf_hub_download; \ | |
hf_hub_download(repo_id='bartowski/Llama-3.2-3B-Instruct-GGUF', \ | |
filename='Llama-3.2-3B-Instruct-Q4_K_M.gguf', \ | |
cache_dir='/app/models', local_dir_use_symlinks=False)" | |
EXPOSE 7860 | |
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"] | |