|
#!/bin/bash |
|
set -e |
|
|
|
|
|
export MODEL_PATH=${MODEL_PATH:-"/repository"} |
|
|
|
echo "Starting NeMo Skills inference endpoint..." |
|
echo "Model path: $MODEL_PATH" |
|
|
|
|
|
cleanup() { |
|
echo "Cleaning up processes..." |
|
kill $(jobs -p) 2>/dev/null || true |
|
wait |
|
} |
|
trap cleanup EXIT |
|
|
|
|
|
echo "Starting model server..." |
|
ns start_server \ |
|
--model="$MODEL_PATH" \ |
|
--server_gpus=2 \ |
|
--server_type=vllm \ |
|
--with_sandbox & |
|
|
|
|
|
echo "Starting HTTP endpoint on port 80..." |
|
python /usr/local/endpoint/server.py & |
|
|
|
|
|
echo "Both servers started. Waiting..." |
|
wait |
|
|