generated from tu-studio/hpc-cluster-ml-workflow
-
Notifications
You must be signed in to change notification settings - Fork 0
/
slurm_job.sh
39 lines (31 loc) · 1.17 KB
/
slurm_job.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/bin/bash
# Copyright 2024 tu-studio
# This file is licensed under the Apache License, Version 2.0.
# See the LICENSE file in the root of this project for details.
#SBATCH -J tustu
#SBATCH --ntasks=1
#SBATCH --nodes=1
#SBATCH --ntasks-per-core=1
#SBATCH --cpus-per-task=1
#SBATCH --gres=gpu:tesla:1
#SBATCH --mem=100GB
#SBATCH --time=40:00:00
#SBATCH --partition=gpu
#SBATCH --output=./logs/slurm/slurm-%j.out
# Load necessary modules
module load singularity/4.0.2
# Set environment variables defined in global.env
set -o allexport
source global.env
set +o allexport
# Define DEFAULT_DIR in the host environment
export DEFAULT_DIR="$PWD"
# Remove the previous singularity image if it exists
if [ -f $TUSTU_PROJECT_NAME-image_latest.sif ]; then
rm $TUSTU_PROJECT_NAME-image_latest.sif
fi
# Pull the latest docker image from Docker Hub and convert it to a singularity image. Using cached singularity image if nothing changed
singularity pull docker://$TUSTU_DOCKERHUB_USERNAME/$TUSTU_PROJECT_NAME-image:latest
echo "Starting singularity execution..."
# Run the singularity container
singularity exec --nv --bind $DEFAULT_DIR $TUSTU_PROJECT_NAME-image_latest.sif ./exp_workflow.sh