Skip to content

Commit

Permalink
Merge pull request #71 from phac-nml/INX/DBUpdate
Browse files Browse the repository at this point in the history
updated default databases to null
  • Loading branch information
mattheww95 authored May 2, 2024
2 parents af22d3c + 29dfc63 commit bb93c35
Show file tree
Hide file tree
Showing 19 changed files with 3,043 additions and 27 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,17 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## v0.1.2 - [2024-05-02]

### Added

### Changed

- Changed default values for database parameters `--dehosting_idx`, `--mash_sketch`, `--kraken2_db`, and `--bakta_db` to null.
- Enabled checking for existance of database files in JSON Schema to avoid issues with staging non-existent files in Azure.
- Set `--kraken2_db` to be a required parameter for the pipeline.
- Hide bakta parameters from IRIDA Next UI.

## v0.1.1 - [2024-04-22]

### Added
Expand Down
8 changes: 6 additions & 2 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,14 @@ params {

platform = "illumina"

mash.mash_sketch = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy-staph-ecoli.msh"
mash_sketch = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy-staph-ecoli.msh"
mash.mash_sketch = mash_sketch
mash.min_kmer = 1

r_contaminants.mega_mm2_idx = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy.mmi"
dehosting_idx = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy.mmi"
r_contaminants.mega_mm2_idx = dehosting_idx
kraken2_db = "${projectDir}/tests/data/kraken2/test"
kraken.db = kraken2_db

fastp.args.illumina = "-Q"
min_reads = 100
Expand Down
10 changes: 5 additions & 5 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,10 @@ params {


// Datasets
dehosting_idx = "./databases/PhiPacHum_m2.idx" // mm2 index
mash_sketch = "./databases/GTDBSketch_20231003.msh" // Make sure comments are formatted as taxonomic strings
bakta_db = "./databases/db-light"
kraken2_db = "./databases/k2_standard_20220607/"
dehosting_idx = null // mm2 index
mash_sketch = null // Make sure comments are formatted as taxonomic strings
bakta_db = null
kraken2_db = null
staramr_db = null // Recommended usage is to use the default database in the container


Expand Down Expand Up @@ -1026,7 +1026,7 @@ manifest {
description = """Mikrokondo beta"""
mainScript = 'main.nf'
nextflowVersion = '!>=23.04.0'
version = '0.1.1'
version = '0.1.2'
defaultBranch = 'main'
doi = ''
}
Expand Down
28 changes: 16 additions & 12 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -76,43 +76,46 @@
"properties": {
"dehosting_idx": {
"type": "string",
"default": "./databases/PhiPacHum_m2.idx",
"description": "Mash sketch used for contamination detection and speciation (Sketch comments must be a taxonomic string similar to what Kraken2 outputs)",
"pattern": "^\\S+$",
"exists": true,
"format": "file-path"
},
"mash_sketch": {
"type": "string",
"default": "./databases/GTDBSketch_20231003.msh",
"description": "Minimpa2 index for dehosting and kitome removal",
"pattern": "^\\S+$",
"exists": true,
"format": "file-path"
},
"bakta_db": {
"kraken2_db": {
"type": "string",
"description": "Database use for bakta, this value is optional as bakta can be skipped",
"default": "./databases/db-light",
"description": "Kraken2 database",
"pattern": "^\\S+$",
"exists": true,
"format": "directory-path"
},
"kraken2_db": {
"bakta_db": {
"type": "string",
"default": "./databases/k2_standard_20220607/",
"description": "Kraken2 database",
"description": "Database use for bakta, this value is optional as bakta can be skipped",
"pattern": "^\\S+$",
"format": "directory-path"
"exists": true,
"format": "directory-path",
"hidden": true
},
"staramr_db": {
"type": "string",
"description": "It is recommended to use the StarAMR database in the StarAMR container however, an external option can be specified",
"pattern": "^\\S+$",
"exists": true,
"format": "directory-path",
"hidden": true
}
},
"required": [
"dehosting_idx",
"mash_sketch"
"mash_sketch",
"kraken2_db"
],
"description": "The location of databases used by mikrokondo"
},
Expand Down Expand Up @@ -355,8 +358,9 @@
},
"skip_bakta": {
"type": "boolean",
"default": true,
"description": "Skip annotation with Bakta"
"default": true,
"description": "Skip annotation with Bakta",
"hidden": true
},
"skip_abricate": {
"type": "boolean",
Expand Down
2 changes: 1 addition & 1 deletion subworkflows/local/annotate_genomes.nf
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ workflow ANNOTATE_GENOMES {
txt = channel.empty()
abricate_report = channel.empty()

if(!params.skip_bakta){
if(!params.skip_bakta && params.bakta.db){
db_file = Channel.value("${params.bakta.db}")
annotated = BAKTA_ANNOTATE(contig_data, db_file,
[], [], [], [], [], []) // empty channels for optional arguments
Expand Down
4 changes: 2 additions & 2 deletions subworkflows/local/clean_reads.nf
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ workflow QC_READS {

// TODO add in code to check that there are always enough reads left over after decontamination
// TODO need to make sure that if one read is unmapped the other is not included as well
deconned_reads = REMOVE_CONTAMINANTS(reads, file(params.r_contaminants.mega_mm2_idx), Channel.value(platform_comp))
deconned_reads = REMOVE_CONTAMINANTS(reads, params.r_contaminants.mega_mm2_idx ? file(params.r_contaminants.mega_mm2_idx) : error("--dehosting_idx ${params.dehosting_idx} is invalid"), Channel.value(platform_comp))
versions = versions.mix(REMOVE_CONTAMINANTS.out.versions)


Expand Down Expand Up @@ -139,7 +139,7 @@ workflow QC_READS {
ch_prepped_reads = filtered_samples // put in un-downsampled reads
}

mash_screen_out = MASH_SCREEN(ch_prepped_reads, file(params.mash.mash_sketch))
mash_screen_out = MASH_SCREEN(ch_prepped_reads, params.mash.mash_sketch ? file(params.mash.mash_sketch) : error("--mash_sketch ${params.mash_sketch} is invalid"))

versions = versions.mix(mash_screen_out.versions)

Expand Down
4 changes: 2 additions & 2 deletions subworkflows/local/determine_species.nf
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ workflow DETERMINE_SPECIES {
versions = Channel.empty()
if (params.run_kraken){
log.info "Running kraken2 for contigs classification"
KRAKEN(contigs, file(params.kraken.db))
KRAKEN(contigs, params.kraken.db ? file(params.kraken.db) : error("--kraken2_db ${params.kraken.db} is invalid"))

// join contigs for classification
split_contigs = KRAKEN.out.classified_contigs.join(KRAKEN.out.report).join(KRAKEN.out.kraken_output)
Expand All @@ -40,7 +40,7 @@ workflow DETERMINE_SPECIES {

}else {
log.info "Using mash screen for sample classification"
MASH_SCREEN(contigs, file(params.mash.mash_sketch))
MASH_SCREEN(contigs, params.mash.mash_sketch ? file(params.mash.mash_sketch) : error("--mash_sketch ${params.mash_sketch} is invalid"))
results = results.mix(MASH_SCREEN.out.mash_data)

parsed = PARSE_MASH(MASH_SCREEN.out.mash_data, Channel.value("top"))
Expand Down
2 changes: 1 addition & 1 deletion subworkflows/local/split_metagenomic.nf
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ workflow SPLIT_METAGENOMIC {
contigs = contigs.map{
meta, contigs, reads -> tuple(meta, contigs)
}
kraken_out = KRAKEN(contigs, file(params.kraken.db))
kraken_out = KRAKEN(contigs, params.kraken.db ? file(params.kraken.db) : error("--kraken_db ${params.kraken2_db} is invalid"))
staged_kraken_data = kraken_out.classified_contigs.join(kraken_out.report).join(kraken_out.kraken_output)

binned_data = BIN_KRAKEN2(staged_kraken_data, Channel.value(params.kraken_bin.taxonomic_level))
Expand Down
1 change: 1 addition & 0 deletions tests/data/kraken2/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Data within this test is taken from the kraken2 test data set here: https://github.com/DerrickWood/kraken2/tree/master/data
76 changes: 76 additions & 0 deletions tests/data/kraken2/output.k2.cls.fa

Large diffs are not rendered by default.

Loading

0 comments on commit bb93c35

Please sign in to comment.