Spaces:
				
			
			
	
			
			
		Build error
		
	
	
	
			
			
	
	
	
	
		
		
		Build error
		
	File size: 3,717 Bytes
			
			| 8ca00c1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 | #!/bin/bash
#################################################
## TEMPLATE VERSION 1.01                       ##
#################################################
## ALL SBATCH COMMANDS WILL START WITH #SBATCH ##
## DO NOT REMOVE THE # SYMBOL                  ## 
#################################################
#SBATCH --nodes=1                   # How many nodes required? Usually 1
#SBATCH --cpus-per-task=10           # Number of CPU to request for the job
#SBATCH --mem=128GB                   # How much memory does your job require?
#SBATCH --gres=gpu:1                # Do you require GPUS? If not delete this line
#SBATCH --time=05-00:00:00          # How long to run the job for? Jobs exceed this time will be terminated
                                    # Format <DD-HH:MM:SS> eg. 5 days 05-00:00:00
                                    # Format <DD-HH:MM:SS> eg. 24 hours 1-00:00:00 or 24:00:00
#SBATCH --mail-type=BEGIN,END,FAIL  # When should you receive an email?
#SBATCH --output=%u.%j.out          # Where should the log files go?
                                    # You must provide an absolute path eg /common/home/module/username/
                                    # If no paths are provided, the output file will be placed in your current working directory
#SBATCH --requeue                   # Remove if you are not want the workload scheduler to requeue your job after preemption
#SBATCH --constraint=l40		# This tells the workload scheduler to provision you l40 nodes 
################################################################
## EDIT AFTER THIS LINE IF YOU ARE OKAY WITH DEFAULT SETTINGS ##
################################################################
# ================ Account parameters ================
# Description			| Value
# ---------------------------------------------
# Account name                    | tanahhweeresearch
# List of Assigned Partition      | researchlong researchshort tanahhweeresearch
# List of Assigned QOS            | research-1-qos tanahhweeresearch-priority
# ---------------------------------------------
#SBATCH --partition=researchlong                 # The partition you've been assigned
#SBATCH --account=tanahhweeresearch   # The account you've been assigned (normally student)
#SBATCH --qos=research-1-qos       # What is the QOS assigned to you? Check with myinfo command
#SBATCH [email protected] # Who should receive the email notifications
#SBATCH --job-name=1GPU_LLM_HT # Give the job a name
#################################################
##            END OF SBATCH COMMANDS           ##
#################################################
# Purge the environment, load the modules we require.
# Refer to https://violet.smu.edu.sg/origami/module/ for more information
module purge
module load Anaconda3/2022.05
module load CUDA/12.1.1
# Do not remove this line even if you have executed conda init
eval "$(conda shell.bash hook)"
# Create a virtual environment can be commented off if you already have a virtual environment
# conda create -n llm_ht python=3.11
# This command assumes that you've already created the environment previously
# We're using an absolute path here. You may use a relative path, as long as SRUN is execute in the same working directory
# conda activate tgi
conda activate llm_ht 
# If you require any packages, install it before the srun job submission.
# conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia
# Submit your job to the cluster
BASEDIR=$HOME/logical-reasoning/scripts
JOB=$1
echo "Submitting job: $BASEDIR/$JOB"
srun --gres=gpu:1 $BASEDIR/$JOB
# sbatch logical-reasoning/scripts/1gpu_llm_ht.sh tune-mgtv-qwen2_7b.sh
 |