Initial VLLM deploy
Browse files- Dockerfile +16 -0
- app.py +12 -0
- requirement.txt +1 -0
Dockerfile
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04
|
2 |
+
|
3 |
+
# Installer les dépendances système
|
4 |
+
RUN apt-get update && apt-get install -y \
|
5 |
+
python3 python3-pip git && \
|
6 |
+
ln -s /usr/bin/python3 /usr/bin/python
|
7 |
+
|
8 |
+
# Installer VLLM
|
9 |
+
RUN pip install --upgrade pip
|
10 |
+
RUN pip install vllm
|
11 |
+
|
12 |
+
# Ajouter ton script
|
13 |
+
COPY app.py /app.py
|
14 |
+
|
15 |
+
# Lancer le serveur VLLM
|
16 |
+
CMD ["python", "/app.py"]
|
app.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import subprocess
|
2 |
+
|
3 |
+
# Exemple avec un modèle léger (à adapter)
|
4 |
+
model = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
|
5 |
+
|
6 |
+
# Lancement de VLLM en mode API
|
7 |
+
subprocess.run([
|
8 |
+
"python3", "-m", "vllm.entrypoints.api_server",
|
9 |
+
"--model", model,
|
10 |
+
"--host", "0.0.0.0",
|
11 |
+
"--port", "7860"
|
12 |
+
])
|
requirement.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
vllm
|