Spaces:

numind
/

NuMarkdown-8B-Thinking

Running on L40S

App Files Files Community

liamcripwell commited on Aug 8

Commit

ebea37f

verified ·

1 Parent(s): 2a3e019

Update start.sh

Browse files

Files changed (1) hide show

start.sh +49 -17

start.sh CHANGED Viewed

@@ -1,36 +1,68 @@
 #!/bin/bash
-echo "Starting vLLM server..."
-# Start vLLM in the background with logging
 python3 -m vllm.entrypoints.openai.api_server \
     --model numind/NuMarkdown-8B-Thinking \
     --port 8000 \
     --host 0.0.0.0 \
-    --max-model-len 20000 \
-    --gpu-memory-utilization 0.95 > $HOME/app/vllm.log 2>&1 &
 VLLM_PID=$!
 echo "vLLM started with PID: $VLLM_PID"
-# Wait for vLLM to be ready
-echo "Waiting for vLLM server to start..."
-for i in {1..90}; do
-    if curl -s http://localhost:8000/v1/models > /dev/null; then
-        echo "vLLM server is ready!"
         break
     fi
-    echo "Waiting... ($i/90)"
     sleep 2
 done
-# Check if vLLM is actually running
 if ! curl -s http://localhost:8000/v1/models > /dev/null; then
-    echo "ERROR: vLLM server failed to start!"
-    echo "vLLM logs:"
-    cat $HOME/app/vllm.log
     exit 1
 fi
-echo "Starting Gradio app..."
-# Start Gradio app in the foreground
-python3 $HOME/app/app.py

 #!/bin/bash
+echo "=== Starting NuMarkdown-8B-Thinking Space ==="
+echo "Starting vLLM server with optimized settings..."
+# Start vLLM with HF Spaces optimizations
 python3 -m vllm.entrypoints.openai.api_server \
     --model numind/NuMarkdown-8B-Thinking \
     --port 8000 \
     --host 0.0.0.0 \
+    --max-model-len 8048 \
+    --gpu-memory-utilization 0.9 \
+    --disable-log-requests \
+    --tensor-parallel-size 1 \
+    --trust-remote-code > $HOME/app/vllm.log 2>&1 &
 VLLM_PID=$!
 echo "vLLM started with PID: $VLLM_PID"
+# More aggressive waiting with health checks
+echo "Waiting for vLLM server to start (this may take 5-10 minutes)..."
+for i in {1..180}; do  # Wait up to 6 minutes
+    if curl -s --connect-timeout 5 http://localhost:8000/health > /dev/null 2>&1; then
+        echo "✓ vLLM health check passed!"
+        break
+    elif curl -s --connect-timeout 5 http://localhost:8000/v1/models > /dev/null 2>&1; then
+        echo "✓ vLLM server is ready!"
         break
     fi
+    # Show progress every 10 seconds
+    if [ $((i % 10)) -eq 0 ]; then
+        echo "Still waiting... ($i/180) - checking vLLM process"
+        if ! ps -p $VLLM_PID > /dev/null; then
+            echo "❌ vLLM process died! Checking logs:"
+            tail -20 $HOME/app/vllm.log
+            exit 1
+        fi
+    fi
     sleep 2
 done
+# Final check
 if ! curl -s http://localhost:8000/v1/models > /dev/null; then
+    echo "❌ vLLM server failed to start after 6 minutes!"
+    echo "Last 50 lines of vLLM logs:"
+    tail -50 $HOME/app/vllm.log
     exit 1
 fi
+echo "✅ vLLM server is ready!"
+echo "=== Starting Gradio App ==="
+echo "Port 7860 status before launching Gradio:"
+netstat -tuln | grep :7860 || echo "Port 7860 is free"
+echo "Environment check:"
+echo "PORT=${PORT:-7860}"
+echo "PWD=$(pwd)"
+echo "USER=$(whoami)"
+# Launch Gradio with explicit error handling
+echo "Launching Gradio..."
+python3 $HOME/app/app.py || {
+    echo "❌ Gradio failed to start!"
+    echo "Checking if port is in use:"
+    netstat -tuln | grep :7860
+    exit 1
+}