Spaces:
Build error
Build error
| ################################################# | |
| ## TEMPLATE VERSION 1.01 ## | |
| ################################################# | |
| ## ALL SBATCH COMMANDS WILL START WITH #SBATCH ## | |
| ## DO NOT REMOVE THE # SYMBOL ## | |
| ################################################# | |
| #SBATCH --nodes=1 # How many nodes required? Usually 1 | |
| #SBATCH --cpus-per-task=10 # Number of CPU to request for the job | |
| #SBATCH --mem=128GB # How much memory does your job require? | |
| #SBATCH --gres=gpu:1 # Do you require GPUS? If not delete this line | |
| #SBATCH --time=05-00:00:00 # How long to run the job for? Jobs exceed this time will be terminated | |
| # Format <DD-HH:MM:SS> eg. 5 days 05-00:00:00 | |
| # Format <DD-HH:MM:SS> eg. 24 hours 1-00:00:00 or 24:00:00 | |
| #SBATCH --mail-type=BEGIN,END,FAIL # When should you receive an email? | |
| #SBATCH --output=%u.%j.out # Where should the log files go? | |
| # You must provide an absolute path eg /common/home/module/username/ | |
| # If no paths are provided, the output file will be placed in your current working directory | |
| #SBATCH --requeue # Remove if you are not want the workload scheduler to requeue your job after preemption | |
| #SBATCH --constraint=l40 # This tells the workload scheduler to provision you l40 nodes | |
| ################################################################ | |
| ## EDIT AFTER THIS LINE IF YOU ARE OKAY WITH DEFAULT SETTINGS ## | |
| ################################################################ | |
| # ================ Account parameters ================ | |
| # Description | Value | |
| # --------------------------------------------- | |
| # Account name | tanahhweeresearch | |
| # List of Assigned Partition | researchlong researchshort tanahhweeresearch | |
| # List of Assigned QOS | research-1-qos tanahhweeresearch-priority | |
| # --------------------------------------------- | |
| #SBATCH --partition=researchlong # The partition you've been assigned | |
| #SBATCH --account=tanahhweeresearch # The account you've been assigned (normally student) | |
| #SBATCH --qos=research-1-qos # What is the QOS assigned to you? Check with myinfo command | |
| #SBATCH [email protected] # Who should receive the email notifications | |
| #SBATCH --job-name=1GPU_LLM_HT # Give the job a name | |
| ################################################# | |
| ## END OF SBATCH COMMANDS ## | |
| ################################################# | |
| # Purge the environment, load the modules we require. | |
| # Refer to https://violet.smu.edu.sg/origami/module/ for more information | |
| module purge | |
| module load Anaconda3/2022.05 | |
| module load CUDA/12.1.1 | |
| # Do not remove this line even if you have executed conda init | |
| eval "$(conda shell.bash hook)" | |
| # Create a virtual environment can be commented off if you already have a virtual environment | |
| # conda create -n llm_ht python=3.11 | |
| # This command assumes that you've already created the environment previously | |
| # We're using an absolute path here. You may use a relative path, as long as SRUN is execute in the same working directory | |
| # conda activate tgi | |
| conda activate llm_ht | |
| # If you require any packages, install it before the srun job submission. | |
| # conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia | |
| # Submit your job to the cluster | |
| BASEDIR=$HOME/logical-reasoning/scripts | |
| JOB=$1 | |
| echo "Submitting job: $BASEDIR/$JOB" | |
| srun --gres=gpu:1 $BASEDIR/$JOB | |
| # sbatch logical-reasoning/scripts/1gpu_llm_ht.sh tune-mgtv-qwen2_7b.sh | |