nvidia
/

OpenMath-Nemotron-14B-Kaggle

Text Generation

text-generation-inference

Model card Files Files and versions

OpenMath-Nemotron-14B-Kaggle / entrypoint.sh

igitman's picture

inference_endpoint (#2)

b1a665d verified 10 days ago

history blame contribute delete

704 Bytes

	#!/bin/bash
	set -e

	# Default environment variables
	export MODEL_PATH=${MODEL_PATH:-"/repository"}

	echo "Starting NeMo Skills inference endpoint..."
	echo "Model path: $MODEL_PATH"

	# Function to handle cleanup on exit
	cleanup() {
	echo "Cleaning up processes..."
	kill $(jobs -p) 2>/dev/null \|\| true
	wait
	}
	trap cleanup EXIT

	# Start the model server in the background
	echo "Starting model server..."
	ns start_server \
	--model="$MODEL_PATH" \
	--server_gpus=2 \
	--server_type=vllm \
	--with_sandbox &

	# Start the HTTP endpoint
	echo "Starting HTTP endpoint on port 80..."
	python /usr/local/endpoint/server.py &

	# Wait for both processes
	echo "Both servers started. Waiting..."
	wait