Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
faressc authored Jul 25, 2024
0 parents commit 05250aa
Show file tree
Hide file tree
Showing 26 changed files with 948 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Ignore all files in this directory
**
# Except this file
!requirements.txt
3 changes: 3 additions & 0 deletions .dvc/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/config.local
/tmp
/cache
8 changes: 8 additions & 0 deletions .dvc/config
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[core]
remote = myremote
[cache]
shared = group
type = symlink
['remote "myremote"']
url = webdavs://tubcloud.tu-berlin.de/remote.php/dav/files/cf531c5e-2043-103b-8745-111da40a61ee/dvcR
timeout = 600
3 changes: 3 additions & 0 deletions .dvcignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Add patterns of files dvc should ignore, which could improve
# the performance. Learn more at
# https://dvc.org/doc/user-guide/dvcignore
45 changes: 45 additions & 0 deletions .github/workflows/docker-image.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: Docker Image CI

on:
push:
paths:
- 'Dockerfile'
- 'requirements.txt'
- '.github/workflows/docker-image.yml'
pull_request:
paths:
- 'Dockerfile'
- 'requirements.txt'
- '.github/workflows/docker-image.yml'

jobs:
build:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1

- name: Load Environment Variables from global.env
run: |
grep -v '^#' global.env | grep '=' | while IFS='=' read -r key value; do
if [[ -n "$key" && -n "$value" ]]; then
echo "$key=$value" >> $GITHUB_ENV
fi
done
- name: Login to DockerHub
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

- name: Build and push
uses: docker/build-push-action@v2
with:
context: .
file: ./Dockerfile
push: true
tags: ${{ secrets.DOCKER_USERNAME }}/${{ env.TUSTU_PROJECT_NAME }}-image:latest
25 changes: 25 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Ignore typical temporary and system files
.DS_Store
__pycache__/

# Ignore Python virtual environments
myenv
venv

# Ignore data files
data/*/*
logs/*/*
/dvclive
/models
/temp
/exp-logs/*

# Not ignore
!logs/*/.gitkeep
!*.dvc

# Ignore personal notes
NOTES.md

# Ignore singularity image
ml-pipeline-image_latest.sif
47 changes: 47 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Use an official Debian runtime as a parent image
FROM debian:11-slim

# Install necessary packages
RUN apt-get update && apt-get install -y --no-install-recommends \
wget \
build-essential \
libssl-dev \
zlib1g-dev \
libncurses5-dev \
libgdbm-dev \
libreadline-dev \
libffi-dev \
libsqlite3-dev \
curl \
libbz2-dev \
git \
python3-pip \
openssh-client \
rsync \
# Remove apt cache
&& rm -rf /var/lib/apt/lists/*

# Install Python Version 3.12.4
RUN wget --no-check-certificate https://www.python.org/ftp/python/3.12.4/Python-3.12.4.tgz \
&& tar -xf Python-3.12.4.tgz \
&& cd Python-3.12.4 \
&& ./configure --enable-optimizations \
&& make -j$(nproc) \
&& make altinstall \
&& cd .. \
# Delete the unzipped directory and downloaded archive to save space
&& rm -rf Python-3.12.4 Python-3.12.4.tgz \
# Create symlink for python3
&& ln -s /usr/local/bin/python3.12 /usr/local/bin/python3

# Set the working directory
WORKDIR /home/app

# Copy the python requirements list to /home/app and install them
COPY requirements.txt .
RUN python3 -m pip install -r requirements.txt \
&& rm requirements.txt




70 changes: 70 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# ml-training-pipeline

This repository provides a comprehensive template for the management of reproducible pipelines for machine learning training in the context of audio. The template is utilizing [DVC](https://dvc.org/) (data version control) and is adjusted for experiments on the Remote SLURM-Cluster [HPC cluster of the Technical University of Berlin](https://www.tu.berlin/campusmanagement/angebot/high-performance-computing-hpc).

## Features


## Install and Setup

```
git clone https://github.com/tu-studio/dataset-pipeline-template
```


Create and setup a virtual environment inside the repository. If you chose a different name than *myenv* make sure to add the directory name of your venv to the .gitignore.


```
cd ml-training-pipeline
python3 -m venv venv
echo venv/ >> .gitignore
source venv/bin/activate
pip install -r requirements.txt
```


Initiliase a dvc repository.

```
dvc init
```

Add a WebDAV server as remote storage to your dvc repository.

```
dvc remote add -d myremote webdavs://tubcloud.tu-berlin.de/remote.php/dav/files/cf531c5e-2043-103b-8745-111da40a61ee/DVC
```

Add your username and password for server acces to a private config file (will be ignored by git).

```
dvc remote modify --local myremote user 'yourusername'
dvc remote modify --local myremote password 'yourpassword'
dvc remote modify myremote ask_password true
```

Add the raw data folder to the dvc repository.

```
dvc add data/raw
```


## Usage



## Contributors

- [Michael Witte](https://github.com/michaelwitte)
- [Fares Schulz](https://github.com/faressc)

## License

3 changes: 3 additions & 0 deletions TODO.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# TODO

- write into Readme.md that global git config should be available because we push to the repository (singularity automatically mounts $HOME directory, so this one is used) for Docker this is not the case find another solution
74 changes: 74 additions & 0 deletions batchjob.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#!/bin/bash

#SBATCH -J exp_job
#SBATCH --ntasks=1
#SBATCH --nodes=1
#SBATCH --ntasks-per-core=1
#SBATCH --cpus-per-task=1
#SBATCH --gres=gpu:tesla:1
#SBATCH --mem=100GB
#SBATCH --time=1:00:00
#SBATCH --partition=gpu
#SBATCH --output=./logs/slurm/slurm-%j.out

# Load necessary modules
module load singularity/4.0.2

# Set environment variables defined in global.env
export $(grep -v '^#' global.env | xargs)

# Remove the previous singularity image if it exists
if [ -f $TUSTU_PROJECT_NAME-image_latest.sif ]; then
rm $TUSTU_PROJECT_NAME-image_latest.sif
fi
# Pull the latest docker image from Docker Hub and convert it to a singularity image. Using cached singularity image if nothing changed
singularity pull docker://$TUSTU_DOCKERHUB_USERNAME/$TUSTU_PROJECT_NAME-image:latest

echo "Starting singularity execution..."

# Run the singularity container
DEFAULT_DIR="$PWD" singularity exec --nv ml-pipeline-image_latest.sif bash -c '
echo "Checking directory existence..."
if [ ! -d "../$TUSTU_TEMP_PATH" ]; then
mkdir -p "../$TUSTU_TEMP_PATH"
echo "The directory ../$TUSTU_TEMP_PATH has been created."
else
echo "The directory ../$TUSTU_TEMP_PATH exists."
fi
if [ -z "$INDEX" ]
then
echo "Creating new index 0..."
INDEX=0
fi
mkdir "../$TUSTU_TEMP_PATH/$INDEX"
echo "Copying files..."
{
git ls-files;
echo ".dvc/config.local";
echo ".git";
} | while read file; do
cp -r --parents "$file" "../$TUSTU_TEMP_PATH/$INDEX/"
done
cd ../$TUSTU_TEMP_PATH/$INDEX
echo "Setting DVC cache directory..."
dvc cache dir $DEFAULT_DIR/.dvc/cache
# dvc config cache.shared group
# dvc config cache.type symlink
echo "Pulling data with DVC..."
dvc pull
echo "Running experiment..."
dvc exp run $EXP_PARAMS &&
echo "Pushing experiment..."
dvc exp push origin &&
echo "Cleaning up..."
cd .. &&
rm -rf $INDEX
'
2 changes: 2 additions & 0 deletions data/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/raw
/processed
6 changes: 6 additions & 0 deletions data/raw.dvc
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
outs:
- md5: 258f409b9cb543c7cae3902ab7eb808f.dir
size: 65879160
nfiles: 2
hash: md5
path: raw
Empty file added docs/.gitkeep
Empty file.
51 changes: 51 additions & 0 deletions dvc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
stages:
preprocess:
cmd: python3 source/preprocess.py
deps:
- source/preprocess.py
- data/raw/
params:
- preprocess.input_file
- preprocess.target_file
- preprocess.input_size
- preprocess.test_split
- preprocess.output_dir
outs:
- data/processed/
train:
cmd: python3 source/train.py
deps:
- source/train.py
- source/model.py
- data/processed/
params:
- train.name
- train.batch_size
- train.epochs
- train.train_mode
- train.device
outs:
- models/checkpoints/
- dvclive/
- exp-logs/tensorboard/
export:
cmd: python3 source/export.py
deps:
- source/export.py
- models/checkpoints/
params:
- preprocess.input_size
- train.name
- train.train_mode
outs:
- models/exports/
save_logs:
cmd: python3 source/utils/save_logs.py
outs:
- exp-logs/slurm

metrics:
- dvclive/metrics.json
plots:
- dvclive/plots/metrics:
x: step
21 changes: 21 additions & 0 deletions exec_experiment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import itertools
import subprocess
import os

# Submit experiment for hyperparameter combination
def submit_batch_job(index, test_split, batch_size):
# Set dynamic parameters for the batch job as environment variables
# But dont forget to add the os.environ to the new environment variables otherwise the PATH is not found
env = {
**os.environ,
"EXP_PARAMS": f"-S preprocess.test_split={test_split} -S train.batch_size={batch_size}",
"INDEX": str(index)
}
# Run sbatch command with the environment variables as bash! subprocess! command (otherwise module not found)
subprocess.run(['/usr/bin/bash', '-c', 'sbatch batchjob.sh'], env=env)

if __name__ == "__main__":
test_split_list = [0.2, 0.3]
batch_size_list = [2048, 4096]
for index,(test_split, batch_size) in enumerate(itertools.product(test_split_list, batch_size_list)):
submit_batch_job(index,test_split,batch_size)
4 changes: 4 additions & 0 deletions global.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
TUSTU_PROJECT_NAME=ml-pipeline
TUSTU_DOCKERHUB_USERNAME=tustudio
TUSTU_LOGS_PATH=logs
TUSTU_TEMP_PATH=temp
Empty file added logs/slurm/.gitkeep
Empty file.
Empty file added logs/tensorboard/.gitkeep
Empty file.
Loading

0 comments on commit 05250aa

Please sign in to comment.