Spaces:

KeivanR
/

qwen-classifier-demo

Sleeping

KeivanR commited on Mar 28

Commit

65afda8

1 Parent(s): f655296

fix dockerfile and requirements

Files changed (3) hide show

Dockerfile CHANGED Viewed

@@ -1,17 +1,41 @@
-# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
-# you will also find guides on how best to write your Dockerfile
-FROM python:3.9
-RUN useradd -m -u 1000 user
-USER user
-ENV PATH="/home/user/.local/bin:$PATH"
-WORKDIR /app
-COPY --chown=user ./requirements.txt requirements.txt
-RUN pip install --no-cache-dir --upgrade -r requirements.txt
-COPY --chown=user . /app
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

+# Base image with Python and CUDA for GPU support
+FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04
+# Install system dependencies
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    python3.10 \
+    python3-pip \
+    python3.10-venv \
+    git \
+    libgl1 \
+    libglib2.0-0 && \
+    rm -rf /var/lib/apt/lists/*
+# Create and activate virtual environment
+RUN python3.10 -m venv /opt/venv
+ENV PATH="/opt/venv/bin:$PATH"
+# Install Python dependencies first (for better caching)
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Install PyTorch with CUDA support
+RUN pip install --no-cache-dir \
+    torch==2.1.2+cu121 \
+    torchvision==0.16.2+cu121 \
+    --extra-index-url https://download.pytorch.org/whl/cu121
+# Copy entire application (including model.py)
+COPY . .
+# Special model loading step
+RUN python3 -c "
+from model import QwenClassifier
+QwenClassifier.from_pretrained('KeivanR/Qwen2.5-1.5B-Instruct-MLB-clf_lora-1743189446')
+print('Model loaded successfully')
+"
+# Run FastAPI app
+EXPOSE 7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py CHANGED Viewed

@@ -1,9 +1,14 @@
-from transformers import pipeline
 from fastapi import FastAPI
-app = FastAPI()
-classifier = pipeline("text-classification", model="KeivanR/Qwen2.5-1.5B-Instruct-MLB-clf_lora-1743189446")
 @app.post("/predict")
-def predict(text: str):
-    return classifier(text)

 from fastapi import FastAPI
+from qwen_classifier.predict import predict_single  # Your existing function
+import torch
+app = FastAPI(title="Qwen Classifier")
+@app.on_event("startup")
+async def load_model():
+    # Warm up GPU
+    torch.zeros(1).cuda()
 @app.post("/predict")
+async def predict(text: str):
+    return predict_single(text, backend="local")

requirements.txt CHANGED Viewed

@@ -1,2 +1,8 @@
-fastapi
-uvicorn[standard]

+fastapi>=0.95.0
+uvicorn>=0.22.0
+transformers>=4.36.0
+accelerate>=0.24.0
+huggingface-hub>=0.19.0
+torch>=2.1.0
+sentencepiece>=0.1.99
+auto-gptq>=0.5.0  # If using quantized model