aboutsummaryrefslogtreecommitdiff
path: root/distributed.sh
diff options
context:
space:
mode:
Diffstat (limited to 'distributed.sh')
-rwxr-xr-xdistributed.sh34
1 files changed, 0 insertions, 34 deletions
diff --git a/distributed.sh b/distributed.sh
deleted file mode 100755
index 4949159..0000000
--- a/distributed.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/bin/bash
-
-#SBATCH --job-name=swr-teamprojekt
-#SBATCH --partition=a100
-#SBATCH --time=00:30:00
-
-### e.g. request 4 nodes with 1 gpu each, totally 4 gpus (WORLD_SIZE==4)
-### Note: --gres=gpu:x should equal to ntasks-per-node
-#SBATCH --nodes=2
-#SBATCH --ntasks-per-node=1
-#SBATCH --gres=gpu:a100:1
-#SBATCH --cpus-per-task=8
-#SBATCH --mem=64gb
-#SBATCH --chdir=/mnt/lustre/mladm/mfa252/SWR2-cool-projekt-main/
-#SBATCH --output=/mnt/lustre/mladm/mfa252/%x-%j.out
-
-### change 5-digit MASTER_PORT as you wish, slurm will raise Error if duplicated with others
-### change WORLD_SIZE as gpus/node * num_nodes
-export MASTER_PORT=18120
-export WORLD_SIZE=2
-
-### get the first node name as master address - customized for vgg slurm
-### e.g. master(gnodee[2-5],gnoded1) == gnodee2
-echo "NODELIST="${SLURM_NODELIST}
-master_addr=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)
-export MASTER_ADDR=$master_addr
-echo "MASTER_ADDR="$MASTER_ADDR
-
-export NCCL_DEBUG="INFO"
-
-source venv/bin/activate
-
-### the command to run
-srun ./train.sh