Skip to content

Commit

Permalink
Merge branch 'np_jw_test_illumina_genotyping_arrays' into jw_add_vari…
Browse files Browse the repository at this point in the history
…ant_calling_test
  • Loading branch information
jessicaway committed Jan 6, 2025
2 parents 339e0da + 5fbfd41 commit afe8d0b
Show file tree
Hide file tree
Showing 37 changed files with 1,721 additions and 507 deletions.
8 changes: 8 additions & 0 deletions .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,10 @@ workflows:
subclass: WDL
primaryDescriptorPath: /pipelines/skylab/atac/atac.wdl

- name: TestExomeGermlineSingleSample
subclass: WDL
primaryDescriptorPath: /verification/test-wdls/TestExomeGermlineSingleSample.wdl

- name: TestIlluminaGenotypingArray
subclass: WDL
primaryDescriptorPath: /verification/test-wdls/TestIlluminaGenotypingArray.wdl
Expand All @@ -139,6 +143,10 @@ workflows:
subclass: WDL
primaryDescriptorPath: /verification/test-wdls/TestMultiome.wdl

- name: TestReblockGVCF
subclass: WDL
primaryDescriptorPath: /verification/test-wdls/TestReblockGVCF.wdl

- name: Testsnm3C
subclass: WDL
primaryDescriptorPath: /verification/test-wdls/Testsnm3C.wdl
Expand Down
410 changes: 410 additions & 0 deletions .github/workflows/test_exome_germline_single_sample.yml

Large diffs are not rendered by default.

400 changes: 400 additions & 0 deletions .github/workflows/test_reblockGVCF.yml

Large diffs are not rendered by default.

14 changes: 7 additions & 7 deletions pipeline_versions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ ExternalExomeReprocessing 3.3.3 2024-11-04
CramToUnmappedBams 1.1.3 2024-08-02
WholeGenomeReprocessing 3.3.3 2024-11-04
ExomeReprocessing 3.3.3 2024-11-04
BuildIndices 3.0.0 2023-12-06
BuildIndices 3.1.0 2024-11-26
scATAC 1.3.2 2023-08-03
snm3C 4.0.4 2024-08-06
Multiome 5.9.1 2024-11-12
PairedTag 1.8.2 2024-11-12
Multiome 5.9.4 2024-12-05
PairedTag 1.9.0 2024-12-05
MultiSampleSmartSeq2 2.2.22 2024-09-11
MultiSampleSmartSeq2SingleNucleus 2.0.4 2024-11-12
Optimus 7.8.2 2024-11-12
atac 2.5.2 2024-11-12
MultiSampleSmartSeq2SingleNucleus 2.0.6 2024-11-15
Optimus 7.9.0 2024-12-05
atac 2.5.3 2024-11-22
SmartSeq2SingleSample 5.1.21 2024-09-11
SlideSeq 3.4.5 2024-11-12
SlideSeq 3.4.7 2024-12-3
5 changes: 5 additions & 0 deletions pipelines/skylab/atac/atac.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 2.5.3
2024-11-22 (Date of Last Commit)

* Updated the warp-tools docker; this update changes the way gene_names are identified when creating gene expression h5ad files; does not impact ATAC workflow

# 2.5.2
2024-11-12 (Date of Last Commit)

Expand Down
4 changes: 2 additions & 2 deletions pipelines/skylab/atac/atac.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,15 @@ workflow ATAC {
String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG"
}

String pipeline_version = "2.5.2"
String pipeline_version = "2.5.3"

# Determine docker prefix based on cloud provider
String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
String acr_docker_prefix = "dsppipelinedev.azurecr.io/"
String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix

# Docker image names
String warp_tools_2_2_0 = "warp-tools:2.2.0"
String warp_tools_2_2_0 = "warp-tools:2.5.0"
String cutadapt_docker = "cutadapt:1.0.0-4.4-1686752919"
String samtools_docker = "samtools-dist-bwa:3.0.0"
String upstools_docker = "upstools:1.0.0-2023.03.03-1704300311"
Expand Down
5 changes: 5 additions & 0 deletions pipelines/skylab/build_indices/BuildIndices.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 3.1.0
2024-11-26 (Date of Last Commit)

* Added metadata.txt file as an output to the pipeline

# 3.0.0
2023-12-06 (Date of Last Commit)

Expand Down
73 changes: 72 additions & 1 deletion pipelines/skylab/build_indices/BuildIndices.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ workflow BuildIndices {
}

# version of this pipeline
String pipeline_version = "3.0.0"
String pipeline_version = "3.1.0"


parameter_meta {
Expand Down Expand Up @@ -49,12 +49,25 @@ workflow BuildIndices {
organism = organism
}

call RecordMetadata {
input:
pipeline_version = pipeline_version,
input_files = [annotations_gtf, genome_fa, biotypes],
output_files = [
BuildStarSingleNucleus.star_index,
BuildStarSingleNucleus.modified_annotation_gtf,
CalculateChromosomeSizes.chrom_sizes,
BuildBWAreference.reference_bundle
]
}

output {
File snSS2_star_index = BuildStarSingleNucleus.star_index
String pipeline_version_out = "BuildIndices_v~{pipeline_version}"
File snSS2_annotation_gtf_modified = BuildStarSingleNucleus.modified_annotation_gtf
File reference_bundle = BuildBWAreference.reference_bundle
File chromosome_sizes = CalculateChromosomeSizes.chrom_sizes
File metadata = RecordMetadata.metadata_file
}
}
Expand Down Expand Up @@ -195,3 +208,61 @@ String reference_name = "bwa-mem2-2.2.1-~{organism}-~{genome_source}-build-~{gen
}
}


task RecordMetadata {
input {
String pipeline_version
Array[File] input_files
Array[File] output_files
}

command <<<
set -euo pipefail

# create metadata file
echo "Pipeline Version: ~{pipeline_version}" > metadata.txt
echo "Date of Workflow Run: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> metadata.txt
echo "" >> metadata.txt

# echo paths and md5sums for input files
echo "Input Files and their md5sums:" >> metadata.txt
for file in ~{sep=" " input_files}; do
echo "$file : $(md5sum $file | awk '{print $1}')" >> metadata.txt
done
echo "" >> metadata.txt

# echo paths and md5sums for input files
echo "Output Files and their md5sums:" >> metadata.txt
for file in ~{sep=" " output_files}; do
echo "$file : $(md5sum $file | awk '{print $1}')" >> metadata.txt
done
echo "" >> metadata.txt

# grab workspace bucket
file="~{output_files[0]}"
workspace_bucket=$(echo $file | awk -F'/' '{print $3}')
echo "Workspace Bucket: $workspace_bucket" >> metadata.txt

# grab submission ID
submission_id=$(echo $file | awk -F'/' '{print $5}')
echo "Submission ID: $submission_id" >> metadata.txt

# grab workflow ID
workflow_id=$(echo $file | awk -F'/' '{print $7}')
echo "Workflow ID: $workflow_id" >> metadata.txt

echo "" >> metadata.txt
>>>

output {
File metadata_file = "metadata.txt"
}
runtime {
docker: "ubuntu:20.04"
memory: "5 GiB"
disks: "local-disk 100 HDD"
cpu: "1"
}
}

16 changes: 16 additions & 0 deletions pipelines/skylab/multiome/Multiome.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
# 5.9.4
2024-12-05 (Date of Last Commit)

* Moved the optional CellBender task to the Optimus.wdl

# 5.9.3
2024-12-3 (Date of Last Commit)

* Fixed a bug in the StarSoloFastq task that caused the pipeline to not output a UniqueAndMult-Uniform.mtx when --soloMultiMappers Uniform was passed to STAR

# 5.9.2
2024-11-22 (Date of Last Commit)

* Added bam validation in the StarSoloFastq task; this does not affect the outputs of the pipeline
* Updated the warp-tools docker; this update changes the way gene_names are identified when creating gene expression h5ad files

# 5.9.1
2024-11-12 (Date of Last Commit)

Expand Down
57 changes: 12 additions & 45 deletions pipelines/skylab/multiome/Multiome.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ version 1.0
import "../../../pipelines/skylab/atac/atac.wdl" as atac
import "../../../pipelines/skylab/optimus/Optimus.wdl" as optimus
import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils
import "../../../tasks/broad/Utilities.wdl" as utils
import "https://raw.githubusercontent.com/aawdeh/CellBender/aa-cbwithoutcuda/wdl/cellbender_remove_background_azure.wdl" as CellBender_no_cuda
import "https://raw.githubusercontent.com/broadinstitute/CellBender/v0.3.0/wdl/cellbender_remove_background.wdl" as CellBender
import "../../../tasks/broad/Utilities.wdl" as utils

workflow Multiome {

String pipeline_version = "5.9.1"
String pipeline_version = "5.9.4"

input {
String cloud_provider
Expand Down Expand Up @@ -103,7 +103,8 @@ workflow Multiome {
count_exons = count_exons,
soloMultiMappers = soloMultiMappers,
cloud_provider = cloud_provider,
gex_expected_cells = expected_cells
gex_expected_cells = expected_cells,
run_cellbender = run_cellbender
}

# Call the ATAC workflow
Expand Down Expand Up @@ -134,39 +135,6 @@ workflow Multiome {
atac_fragment = Atac.fragment_file
}

# Call CellBender
if (run_cellbender) {
if (cloud_provider == "gcp") {
call CellBender.run_cellbender_remove_background_gpu as CellBender {
input:
sample_name = input_id,
input_file_unfiltered = Optimus.h5ad_output_file,
hardware_boot_disk_size_GB = 20,
hardware_cpu_count = 4,
hardware_disk_size_GB = 50,
hardware_gpu_type = "nvidia-tesla-t4",
hardware_memory_GB = 32,
hardware_preemptible_tries = 2,
hardware_zones = "us-central1-a us-central1-c",
nvidia_driver_version = "470.82.01"
}
}
if (cloud_provider == "azure") {
call CellBender_no_cuda.run_cellbender_remove_background_gpu as CellBender_no_cuda {
input:
sample_name = input_id,
input_file_unfiltered = Optimus.h5ad_output_file,
hardware_boot_disk_size_GB = 20,
hardware_cpu_count = 4,
hardware_disk_size_GB = 50,
hardware_gpu_type = "nvidia-tesla-t4",
hardware_memory_GB = 32,
hardware_preemptible_tries = 2,
hardware_zones = "us-central1-a us-central1-c",
nvidia_driver_version = "470.82.01"
}
}
}

meta {
allowNestedInputs: true
Expand Down Expand Up @@ -201,15 +169,14 @@ workflow Multiome {
File? gex_aligner_metrics = Optimus.aligner_metrics
File? library_metrics = Optimus.library_metrics
File? mtx_files = Optimus.mtx_files
File? cell_barcodes_csv = Optimus.cell_barcodes_csv
File? checkpoint_file = Optimus.checkpoint_file
Array[File]? h5_array = Optimus.h5_array
Array[File]? html_report_array = Optimus.html_report_array
File? log = Optimus.log
Array[File]? metrics_csv_array = Optimus.metrics_csv_array
String? output_directory = Optimus.output_directory
File? summary_pdf = Optimus.summary_pdf

# cellbender outputs
File? cell_barcodes_csv = CellBender.cell_csv
File? checkpoint_file = CellBender.ckpt_file
Array[File]? h5_array = CellBender.h5_array
Array[File]? html_report_array = CellBender.report_array
File? log = CellBender.log
Array[File]? metrics_csv_array = CellBender.metrics_array
String? output_directory = CellBender.output_dir
File? summary_pdf = CellBender.pdf
}
}
16 changes: 16 additions & 0 deletions pipelines/skylab/optimus/Optimus.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
# 7.9.0
2024-12-05 (Date of Last Commit)

* Added an optional task to the Optimus.wdl that will run CellBender on the Optimus output h5ad file

# 7.8.4
2024-12-3 (Date of Last Commit)

* Fixed a bug in the StarSoloFastq task that caused the pipeline to not output a UniqueAndMult-Uniform.mtx when --soloMultiMappers Uniform was passed to STAR

# 7.8.3
2024-11-22 (Date of Last Commit)

* Added bam validation in the StarSoloFastq task; this does not affect the outputs of the pipeline
* Updated the warp-tools docker; this update changes the way gene_names are identified when creating gene expression h5ad files

# 7.8.2
2024-11-12 (Date of Last Commit)

Expand Down
Loading

0 comments on commit afe8d0b

Please sign in to comment.