Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	| # Use Python base image | |
| FROM python:3.11-slim | |
| # Set working directory | |
| WORKDIR /app | |
| # Install system dependencies including cuda-toolkit for bitsandbytes | |
| RUN apt-get update && apt-get install -y \ | |
| build-essential \ | |
| curl \ | |
| git \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Create necessary directories with proper permissions | |
| RUN mkdir -p /app/logs /app/src/static /home/user/.cache/huggingface /home/user/.local /app/offload \ | |
| && chmod -R 777 /app/logs /home/user/.cache/huggingface /home/user/.local /app/offload | |
| # Create non-root user | |
| RUN useradd -m -u 1000 user \ | |
| && chown -R user:user /app /home/user/.cache /home/user/.local | |
| # Set environment variables | |
| ENV PYTHONPATH=/app | |
| ENV PYTHONUNBUFFERED=1 | |
| ENV PYTHONDONTWRITEBYTECODE=1 | |
| ENV PORT=7860 | |
| ENV PATH="/home/user/.local/bin:${PATH}" | |
| ENV HF_HOME=/home/user/.cache/huggingface | |
| # Memory optimizations | |
| ENV MALLOC_ARENA_MAX=2 | |
| ENV MALLOC_TRIM_THRESHOLD_=100000 | |
| ENV MALLOC_MMAP_THRESHOLD_=100000 | |
| # Model optimizations | |
| ENV OMP_NUM_THREADS=1 | |
| ENV MKL_NUM_THREADS=1 | |
| ENV NUMEXPR_NUM_THREADS=1 | |
| # Ensure offline mode is disabled | |
| ENV HF_HUB_OFFLINE=0 | |
| ENV TRANSFORMERS_OFFLINE=0 | |
| # Switch to non-root user | |
| USER user | |
| # Upgrade pip and install numpy first | |
| RUN pip install --user --no-cache-dir --upgrade pip | |
| RUN pip install --user --no-cache-dir "numpy<2.0.0" | |
| # Copy requirements first to leverage Docker cache | |
| COPY --chown=user:user requirements.txt . | |
| # Install Python dependencies with memory optimizations | |
| RUN pip install --user --no-cache-dir -r requirements.txt | |
| # Copy application code | |
| COPY --chown=user:user . . | |
| # Expose port for Hugging Face Spaces | |
| EXPOSE 7860 | |
| # Run the application with memory optimizations | |
| CMD ["python", "-u", "-m", "uvicorn", "src.api:app", "--host", "0.0.0.0", "--port", "7860", "--log-level", "debug", "--workers", "1", "--limit-concurrency", "1", "--timeout-keep-alive", "120"] | |