diff --git a/.github/workflows/conda_env_check.yml b/.github/workflows/conda_env_check.yml deleted file mode 100644 index 796fe6c..0000000 --- a/.github/workflows/conda_env_check.yml +++ /dev/null @@ -1,27 +0,0 @@ -name: Check Conda Envs - -on: - pull_request: - branches: [ main, master ] - push: - branches: [ main, master ] - workflow_dispatch: - inputs: - envs: - description: 'Regex for envs' - required: false - default: 'envs/' - -jobs: - check_conda_envs: - name: Check Conda Envs - runs-on: ubuntu-latest - - steps: - - name: Checkout Code - uses: actions/checkout@v4 - - - name: Check Conda Envs - uses: Ulthran/conda_env_check@v1 - with: - envs: "envs/" diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml new file mode 100644 index 0000000..3e7164e --- /dev/null +++ b/.github/workflows/docker.yml @@ -0,0 +1,60 @@ +name: Push to DockerHub + +on: + workflow_call: + + workflow_dispatch: + +jobs: + build-and-push-to-dockerhub: + name: Push Docker image to Docker Hub + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Get sbx version + shell: bash + run: | + SBX_VER=$(cat VERSION) + echo "SBX_VER=$SBX_VER" >> $GITHUB_ENV + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v5 + with: + images: sunbeamlabs/sbx_assembly + + - name: Build and push Docker image for annotation + uses: docker/build-push-action@v5 + with: + context: . + file: envs/sbx_annotation.Dockerfile + push: true + tags: sunbeamlabs/sbx_assembly:${{ env.SBX_VER }}-annotation + labels: ${{ steps.meta.outputs.labels }} + + - name: Build and push Docker image for assembly + uses: docker/build-push-action@v5 + with: + context: . + file: envs/sbx_assembly.Dockerfile + push: true + tags: sunbeamlabs/sbx_assembly:${{ env.SBX_VER }}-assembly + labels: ${{ steps.meta.outputs.labels }} + + - name: Build and push Docker image for coverage + uses: docker/build-push-action@v5 + with: + context: . + file: envs/sbx_coverage.Dockerfile + push: true + tags: sunbeamlabs/sbx_assembly:${{ env.SBX_VER }}-coverage + labels: ${{ steps.meta.outputs.labels }} \ No newline at end of file diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml deleted file mode 100755 index 867df5a..0000000 --- a/.github/workflows/linter.yml +++ /dev/null @@ -1,25 +0,0 @@ -name: Super-Linter - -on: - pull_request: - branches: [ master, main ] - push: - branches: [ master, main ] - -jobs: - lint: - name: Lint Code Base - runs-on: ubuntu-latest - - steps: - - name: Checkout Code - uses: actions/checkout@v3 - - - name: Lint Code Base - uses: github/super-linter@v4 - env: - VALIDATE_ALL_CODEBASE: true - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - VALIDATE_SNAKEMAKE_SNAKEFMT: true - VALIDATE_PYTHON_BLACK: true \ No newline at end of file diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml new file mode 100644 index 0000000..ed35a2b --- /dev/null +++ b/.github/workflows/pr.yml @@ -0,0 +1,12 @@ +name: Tests + +on: + pull_request: + branches: + - main + +jobs: + run-tests: + uses: ./.github/workflows/tests.yml + secrets: inherit + \ No newline at end of file diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..2db6d23 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,79 @@ +name: Release + +on: + release: + types: [published] + + workflow_dispatch: + +jobs: + run-tests: + uses: ./.github/workflows/tests.yml + secrets: inherit + + check-version: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Get sbx version + shell: bash + run: | + SBX_VER=$(cat VERSION) + echo "SBX_VER=$SBX_VER" >> $GITHUB_ENV + + - id: get_version + uses: battila7/get-version-action@v2 + + - name: Check version + shell: bash + run: | + RELEASE_VERSION=${{ steps.get_version.outputs.version-without-v }} + echo "Release version: ${RELEASE_VERSION}" + echo "Sbx version: ${{ env.SBX_VER }}" + + if [[ $RELEASE_VERSION == ${{ env.SBX_VER }} ]]; then + echo "Versions match, continuing..." + else + echo "Versions don't match, exiting..." + exit 1 + fi + + push-to-dockerhub: + uses: ./.github/workflows/docker.yml + secrets: inherit + needs: + - run-tests + - check-version + + test-apptainer: + name: Apptainer Test + runs-on: ubuntu-latest + needs: push-to-dockerhub + + steps: + - name: Checkout Code + uses: actions/checkout@v4 + + - name: Set test env + run: echo "SUNBEAM_TEST_PROFILE=apptainer" >> $GITHUB_ENV + + - uses: eWaterCycle/setup-apptainer@v2 + with: + apptainer-version: 1.1.2 + + - name: Test with Sunbeam + uses: sunbeam-labs/sbx_test_action@v1 + with: + test-directory: ".tests/e2e/" + + - name: Dump Logs + shell: bash + if: always() + run: tail -n +1 logs/* + + - name: Dump Stats + shell: bash + if: always() + run: cat stats/* \ No newline at end of file diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 30cc75e..2df6a4c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,31 +1,73 @@ name: Tests on: - pull_request: - branches: [ master, main ] push: branches: [ master, main ] + workflow_call: + workflow_dispatch: schedule: - cron: "0 13 * * 1" -jobs: +jobs: + lint: + name: Lint Code + runs-on: ubuntu-latest + + steps: + - name: Checkout Code + uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: 3.12 + + - name: Install Dependencies + run: pip install black snakefmt + + - name: Run Linter + run: | + black --check . + snakefmt --check *.smk + + test-unit: + name: Run Extension Unit Tests + runs-on: ubuntu-latest + + steps: + - name: Checkout Code + uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: 3.12 + + - name: Install Dependencies + run: pip install pytest + + - name: Run Unit Tests + run: true #pytest .tests/unit/ + # This'll require having a lib within scripts with internal tests + test-e2e: name: Test Extension with Sunbeam runs-on: ubuntu-latest + needs: + - test-unit + - lint steps: - name: Checkout Code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Test with Sunbeam uses: sunbeam-labs/sbx_test_action@v1 - - - name: Dump Logs - shell: bash - if: always() - run: tail -n +1 logs/* + + #- name: Dump Logs + # shell: bash + # if: always() + # run: tail -n +1 logs/* - - name: Dump Stats - shell: bash - if: always() - run: cat stats/* + #- name: Dump Stats + # shell: bash + # if: always() + # run: cat stats/* diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ed8ebf5 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__ \ No newline at end of file diff --git a/.tests/test_megahit.sh b/.tests/test_megahit.sh deleted file mode 100755 index f03d6f2..0000000 --- a/.tests/test_megahit.sh +++ /dev/null @@ -1,33 +0,0 @@ -# Fix for #167: -# Check that if megahit gives a nonzero exit code it is handled appropriately. -# The two main cases are 255 (empty contigs) and anything else nonzero -# (presumed to be memory-related in the assembly rules). -# Checking for successful behavior is already handled in test_all. -function test_assembly_failures { - # Up to just before the assembly rules, things should work fine. - sunbeam run --profile $TEMPDIR/ all_decontam --configfile=$TEMPDIR/tmp_config.yml - # Remove previous assembly files, if they exist. - rm -rf $TEMPDIR/sunbeam_output/assembly - - # If megahit exits with 255, it implies no contigs were built. -# mkdir -p "$TEMPDIR/megahit_255" -# echo -e '#!/usr/bin/env bash\nexit 255' > $TEMPDIR/megahit_255/megahit -# chmod +x $TEMPDIR/megahit_255/megahit -# ( -# export PATH="$TEMPDIR/megahit_255:$PATH" -# txt=$(sunbeam run -- --configfile=$TEMPDIR/tmp_config.yml -p all_assembly) -# echo "$txt" > /mnt/d/Penn/sunbeam/log.txt -# echo "$txt" | grep "Empty contigs" -# ) - - # If megahit gives an exit code != 0 and != 255 it is an error. - mkdir -p "$TEMPDIR/megahit_137" - echo -e '#!/usr/bin/env bash\nexit 137' > $TEMPDIR/megahit_137/megahit - chmod +x $TEMPDIR/megahit_137/megahit - ( - export PATH="$TEMPDIR/megahit_137:$PATH" - # (This command should *not* exit successfully.) - ! txt=$(sunbeam run --profile $TEMPDIR/ all_assembly --configfile=$TEMPDIR/tmp_config.yml) - echo "$txt" | grep "Check your memory" - ) -} \ No newline at end of file diff --git a/README.md b/README.md index 50fc21b..f7fbb7b 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [![Tests](https://github.com/sunbeam-labs/sbx_assembly/actions/workflows/tests.yml/badge.svg)](https://github.com/sunbeam-labs/sbx_assembly/actions/workflows/tests.yml) -[![Super-Linter](https://github.com/sunbeam-labs/sbx_assembly/actions/workflows/linter.yml/badge.svg)](https://github.com/sunbeam-labs/sbx_assembly/actions/workflows/linter.yml) +[![DockerHub](https://img.shields.io/docker/pulls/sunbeamlabs/sbx_assembly)](https://hub.docker.com/repository/docker/sunbeamlabs/sbx_assembly/) A [Sunbeam](https://github.com/sunbeam-labs/sunbeam) extension for assembly of contigs using Megahit, gene annotation using Prodigal, and annotation using [Blast](https://blast.ncbi.nlm.nih.gov/Blast.cgi) and [Diamond](https://github.com/bbuchfink/diamond). It can also map reads to contigs and calculat per-base coverage using [Minimap2](https://github.com/lh3/minimap2) and [samtools](https://github.com/samtools/samtools). diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..bd52db8 --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +0.0.0 \ No newline at end of file diff --git a/envs/sbx_annotation.Dockerfile b/envs/sbx_annotation.Dockerfile new file mode 100644 index 0000000..1011efe --- /dev/null +++ b/envs/sbx_annotation.Dockerfile @@ -0,0 +1,17 @@ +FROM condaforge/mambaforge:latest + +# Setup +WORKDIR /home/sbx_assembly_env + +COPY envs/sbx_annotation.yml ./ + +# Install environment +RUN mamba env create --file sbx_annotation.yml --name sbx_annotation + +ENV PATH="/opt/conda/envs/sbx_annotation/bin/:${PATH}" + +# "Activate" the environment +SHELL ["conda", "run", "-n", "sbx_annotation", "/bin/bash", "-c"] + +# Run +CMD "bash" \ No newline at end of file diff --git a/envs/sbx_assembly.Dockerfile b/envs/sbx_assembly.Dockerfile new file mode 100644 index 0000000..9012d53 --- /dev/null +++ b/envs/sbx_assembly.Dockerfile @@ -0,0 +1,17 @@ +FROM condaforge/mambaforge:latest + +# Setup +WORKDIR /home/sbx_assembly_env + +COPY envs/sbx_assembly.yml ./ + +# Install environment +RUN mamba env create --file sbx_assembly.yml --name sbx_assembly + +ENV PATH="/opt/conda/envs/sbx_assembly/bin/:${PATH}" + +# "Activate" the environment +SHELL ["conda", "run", "-n", "sbx_assembly", "/bin/bash", "-c"] + +# Run +CMD "bash" \ No newline at end of file diff --git a/envs/sbx_coverage.Dockerfile b/envs/sbx_coverage.Dockerfile new file mode 100644 index 0000000..07b765c --- /dev/null +++ b/envs/sbx_coverage.Dockerfile @@ -0,0 +1,17 @@ +FROM condaforge/mambaforge:latest + +# Setup +WORKDIR /home/sbx_assembly_env + +COPY envs/sbx_coverage.yml ./ + +# Install environment +RUN mamba env create --file sbx_coverage.yml --name sbx_coverage + +ENV PATH="/opt/conda/envs/sbx_coverage/bin/:${PATH}" + +# "Activate" the environment +SHELL ["conda", "run", "-n", "sbx_coverage", "/bin/bash", "-c"] + +# Run +CMD "bash" \ No newline at end of file diff --git a/envs/sbx_coverage.yml b/envs/sbx_coverage.yml index d552836..72db8e4 100644 --- a/envs/sbx_coverage.yml +++ b/envs/sbx_coverage.yml @@ -6,4 +6,4 @@ dependencies: - minimap2 - numpy - samtools=1.9 - - python>3 \ No newline at end of file + - python=3 \ No newline at end of file diff --git a/sbx_annotation.smk b/sbx_annotation.smk index 402d6c8..49804b6 100644 --- a/sbx_annotation.smk +++ b/sbx_annotation.smk @@ -9,6 +9,18 @@ from sunbeamlib.config import makepath, verify TARGET_ANNOTATE = ANNOTATION_FP / "all_samples.tsv" +def get_assembly_ext_path() -> Path: + ext_path = Path(sunbeam_dir) / "extensions" / "sbx_assembly" + if ext_path.exists(): + return ext_path + raise Error( + "Filepath for assembly not found, are you sure it's installed under extensions/sbx_assembly?" + ) + + +SBX_ASSEMBLY_VERSION = open(get_assembly_ext_path() / "VERSION").read().strip() + + try: BENCHMARK_FP except NameError: @@ -55,6 +67,8 @@ rule build_diamond_db: LOG_FP / "build_diamond_db.log", conda: "envs/sbx_annotation.yml" + container: + f"docker://sunbeamlabs/sbx_assembly:{SBX_ASSEMBLY_VERSION}-annotation" shell: """ diamond makedb --in {input} -d {input} 2>&1 | tee {log} @@ -76,6 +90,8 @@ rule run_blastn: threads: Cfg["sbx_annotation"]["threads"] conda: "envs/sbx_annotation.yml" + container: + f"docker://sunbeamlabs/sbx_assembly:{SBX_ASSEMBLY_VERSION}-annotation" shell: """ blastn \ @@ -104,6 +120,8 @@ rule run_diamond_blastp: threads: Cfg["sbx_annotation"]["threads"] conda: "envs/sbx_annotation.yml" + container: + f"docker://sunbeamlabs/sbx_assembly:{SBX_ASSEMBLY_VERSION}-annotation" shell: """ if [ -s {input.genes} ]; then @@ -137,6 +155,8 @@ rule run_diamond_blastx: threads: Cfg["sbx_annotation"]["threads"] conda: "envs/sbx_annotation.yml" + container: + f"docker://sunbeamlabs/sbx_assembly:{SBX_ASSEMBLY_VERSION}-annotation" shell: """ if [ -s {input.genes} ]; then @@ -167,6 +187,8 @@ rule blast_report: ANNOTATION_FP / "{blast_prog}" / "{db}" / "{query}" / "report.tsv", conda: "envs/sbx_annotation.yml" + container: + f"docker://sunbeamlabs/sbx_assembly:{SBX_ASSEMBLY_VERSION}-annotation" script: "scripts/blast_report.py" @@ -213,6 +235,8 @@ rule aggregate_results: prot=Blastdbs["prot"], conda: "envs/sbx_annotation.yml" + container: + f"docker://sunbeamlabs/sbx_assembly:{SBX_ASSEMBLY_VERSION}-annotation" script: "scripts/aggregate_results.py" diff --git a/sbx_assembly.smk b/sbx_assembly.smk index a52ec35..ee68b3a 100644 --- a/sbx_assembly.smk +++ b/sbx_assembly.smk @@ -15,6 +15,18 @@ TARGET_ASSEMBLY = [ ] +def get_assembly_ext_path() -> Path: + ext_path = Path(sunbeam_dir) / "extensions" / "sbx_assembly" + if ext_path.exists(): + return ext_path + raise Error( + "Filepath for assembly not found, are you sure it's installed under extensions/sbx_assembly?" + ) + + +SBX_ASSEMBLY_VERSION = open(get_assembly_ext_path() / "VERSION").read().strip() + + try: BENCHMARK_FP except NameError: @@ -49,6 +61,8 @@ rule megahit_paired: threads: 4 conda: "envs/sbx_assembly.yml" + container: + f"docker://sunbeamlabs/sbx_assembly:{SBX_ASSEMBLY_VERSION}-assembly" shell: """ ## turn off bash strict mode @@ -88,6 +102,8 @@ rule megahit_unpaired: threads: 4 conda: "envs/sbx_assembly.yml" + container: + f"docker://sunbeamlabs/sbx_assembly:{SBX_ASSEMBLY_VERSION}-assembly" shell: """ ## turn off bash strict mode @@ -146,6 +162,8 @@ rule prodigal: LOG_FP / "prodigal_{sample}.log", conda: "envs/sbx_assembly.yml" + container: + f"docker://sunbeamlabs/sbx_assembly:{SBX_ASSEMBLY_VERSION}-assembly" shell: """ if [[ -s {input} ]]; then diff --git a/sbx_coverage.smk b/sbx_coverage.smk index fbab43d..9360cbe 100644 --- a/sbx_coverage.smk +++ b/sbx_coverage.smk @@ -4,6 +4,18 @@ # # Requires Minimap2 and samtools. + +def get_assembly_ext_path() -> Path: + ext_path = Path(sunbeam_dir) / "extensions" / "sbx_assembly" + if ext_path.exists(): + return ext_path + raise Error( + "Filepath for assembly not found, are you sure it's installed under extensions/sbx_assembly?" + ) + + +SBX_ASSEMBLY_VERSION = open(get_assembly_ext_path() / "VERSION").read().strip() + try: BENCHMARK_FP except NameError: @@ -48,6 +60,8 @@ rule minimap_alignment: threads: Cfg["sbx_coverage"]["threads"] conda: "envs/sbx_coverage.yml" + container: + f"docker://sunbeamlabs/sbx_assembly:{SBX_ASSEMBLY_VERSION}-coverage" shell: """ minimap2 -ax sr -t {threads} {input.contig} {input.reads} 1> {output} 2> {log} @@ -66,6 +80,8 @@ rule contigs_sort: threads: Cfg["sbx_coverage"]["threads"] conda: "envs/sbx_coverage.yml" + container: + f"docker://sunbeamlabs/sbx_assembly:{SBX_ASSEMBLY_VERSION}-coverage" shell: """ samtools sort -@ {threads} -o {output} {input} 2>&1 | tee {log} @@ -83,6 +99,8 @@ rule mapping_depth: LOG_FP / "mapping_depth_{sample}.log", conda: "envs/sbx_coverage.yml" + container: + f"docker://sunbeamlabs/sbx_assembly:{SBX_ASSEMBLY_VERSION}-coverage" shell: """ samtools depth -aa {input} 1> {output} 2> {log} @@ -100,6 +118,8 @@ rule get_coverage: LOG_FP / "get_coverage_{sample}.log", conda: "envs/sbx_coverage.yml" + container: + f"docker://sunbeamlabs/sbx_assembly:{SBX_ASSEMBLY_VERSION}-coverage" script: "scripts/get_coverage.py"