Skip to content

Commit

Permalink
Merge pull request #92 from phac-nml/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
mattheww95 authored Jul 4, 2024
2 parents df48c71 + 322075c commit 31c494a
Show file tree
Hide file tree
Showing 10 changed files with 126 additions and 64 deletions.
24 changes: 24 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,29 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.3.0] - 2024-07-04

### `Changed`

- Reformatted QCSummary fields and added a QCMessage field containing the old summary message. See [PR 85](https://github.com/phac-nml/mikrokondo/pull/85)

- Changed default Python3 image to use the StarAMR image. See [PR 90](https://github.com/phac-nml/mikrokondo/pull/90)

- Stripped identifier from taxonomic identification from string. See [PR 90](https://github.com/phac-nml/mikrokondo/pull/90)

- Removed retry logic from processes and switched them to ignore. See [PR 91](https://github.com/phac-nml/mikrokondo/pull/91)

### `Fixed`

- Updated samtools/minimap2 container fixing CI issues and issues running the pipeline with Docker. See [PR 85](https://github.com/phac-nml/mikrokondo/pull/85)

- Removed task.maxRetries from error handling to prevent StackOverflow addressing [PR 91](https://github.com/phac-nml/mikrokondo/pull/91)

### `Added`

- Altered name of stored `SpeciesTopHit` field in the irida-next.config, and added a field displaying the field name used addressing [PR 90](https://github.com/phac-nml/mikrokondo/pull/90)


## [0.2.1] - 2024-06-03

### `Fixed`
Expand Down Expand Up @@ -87,6 +110,7 @@ Initial release of phac-nml/mikrokondo. Mikrokondo currently supports: read trim

- Added integration testing using [nf-test](https://www.nf-test.com/).

[0.3.0]: https://github.com/phac-nml/mikrokondo/releases/tag/0.3.0
[0.2.1]: https://github.com/phac-nml/mikrokondo/releases/tag/0.2.1
[0.2.0]: https://github.com/phac-nml/mikrokondo/releases/tag/0.2.0
[0.1.2]: https://github.com/phac-nml/mikrokondo/releases/tag/0.1.2
Expand Down
8 changes: 5 additions & 3 deletions bin/report_summaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
Matthew Wells: 2023-09-22
"""
from dataclasses import dataclass
from typing import Dict, Union
from typing import Dict, Optional
from collections import defaultdict
import os
import argparse
Expand All @@ -22,14 +22,16 @@ class CleaningInfo:
trim_field int: when split on a delimiter which section of the list to keep
"""
field: str
keep: Union[str, None] = None
trim_field: Union[int, None] = None
keep: Optional[str] = None
trim_field: Optional[int] = None

class JsonImport:
"""Intake json report to convert to CSV"""

__key_order = {v.field: v for v in [CleaningInfo(field="QCStatus"),
CleaningInfo(field="QCSummary"),
CleaningInfo(field="QCParameterSelection"),
CleaningInfo(field="QCMessage"),
CleaningInfo(field="QualityAnalysis", keep="message", trim_field=1),
CleaningInfo(field="meta")]}
__keep_keys = frozenset(__key_order.keys())
Expand Down
6 changes: 4 additions & 2 deletions conf/irida_next.config
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ iridanext {
"QualityAnalysis.nr_contigs.value" : "nr contigs Value",
"QCSummary" : "QC Summary",
"meta.downsampled" : "Downsampled",
"SpeciesTopHit" : "Species",
"SpeciesTopHit" : "predicted_identification_name",
"IdentificationMethod" : "predicted_identification_method",
"ECTyperSubtyping.0.Database" : "ECTyper Database",
"ECTyperSubtyping.0.Evidence" : "ECTyper Evidence",
"ECTyperSubtyping.0.GeneCoverages(%)" : "ECTyper GeneCoverages (%)",
Expand Down Expand Up @@ -128,6 +129,7 @@ iridanext {
]
keep = [
"QCStatus",
"QCSummary",
"QualityAnalysis.checkm_contamination.qc_status",
"QualityAnalysis.checkm_contamination.value",
"QualityAnalysis.average_coverage.qc_status",
Expand All @@ -140,7 +142,7 @@ iridanext {
"QualityAnalysis.length.value",
"QualityAnalysis.nr_contigs.qc_status",
"QualityAnalysis.nr_contigs.value",
"QCSummary",
"IdentificationMethod",
"meta.downsampled",
"SpeciesTopHit",
"ECTyperSubtyping.0.Database",
Expand Down
21 changes: 13 additions & 8 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,8 @@ process {
ext.parameters = params.quast
stageInMode = params.stage_in_mode
// scratch = false
errorStrategy = { task.attempt <= task.maxRetries ? sleep(Math.pow(2, task.attempt) * 200 as long) && 'retry' : 'ignore' }
//errorStrategy = { sleep(Math.pow(2, task.attempt) * 200 as long); return 'retry' }
errorStrategy = 'ignore'
maxForks = 10 // Quast can get overloaded by job subs, so needs to be limited
publishDir = [
[
Expand All @@ -356,7 +357,8 @@ process {
//container = params.checkm.container
ext.parameters = params.checkm
stageInMode = params.stage_in_mode
errorStrategy = { task.attempt <= task.maxRetries ? sleep(Math.pow(2, task.attempt) * 200 as long) && 'retry' : 'finish' }
//errorStrategy = { sleep(Math.pow(2, task.attempt) * 200 as long); return 'retry' }
errorStrategy = 'ignore'
// scratch = false
publishDir = [
[
Expand All @@ -373,7 +375,8 @@ process {
ext.args = ""
//container = params.bandage.container
ext.parameters = params.bandage
errorStrategy = { task.attempt <= task.maxRetries ? sleep(Math.pow(2, task.attempt) * 200 as long) && 'retry' : 'ignore' }
//errorStrategy = { sleep(Math.pow(2, task.attempt) * 200 as long); return 'retry' }
errorStrategy = 'ignore'
stageInMode = params.stage_in_mode
// scratch = false
publishDir = [
Expand Down Expand Up @@ -523,8 +526,8 @@ process {

withName: MASH_SCREEN {
def dir_out = null
errorStrategy = { sleep(Math.pow(2, task.attempt) * 200 as long)
return 'retry' }
//errorStrategy = { sleep(Math.pow(2, task.attempt) * 200 as long); return 'retry' }
errorStrategy = 'ignore'
maxForks = 20
maxErrors = 3
ext.args = "-w"
Expand Down Expand Up @@ -567,7 +570,8 @@ process {
}

withName: FLYE_ASSEMBLE {
errorStrategy = { task.exitStatus in [140] ? 'retry' : 'ignore'}
//errorStrategy = { task.exitStatus in [140] ? 'retry' : 'ignore'}
errorStrategy = 'ignore'
//container = params.flye.container
ext.parameters = params.flye
ext.args = params.flye.args
Expand Down Expand Up @@ -623,8 +627,8 @@ process {
maxRetries = 3
ext.args = ""
ext.parameters = params.spades
//errorStrategy = { sleep(Math.pow(2, task.attempt) * 200 as long); return 'retry' }
errorStrategy = 'ignore'
//errorStrategy = { task.attempt <= task.maxRetries && sleep(Math.pow(2, task.attempt) * 200 as long) ? 'retry' : 'ignore' }
// scratch = false
publishDir = [
[
Expand Down Expand Up @@ -787,7 +791,8 @@ process {
// scratch = false
ext.parameters = params.pilon_iterative
maxRetries = 3
errorStrategy = { task.attempt <= task.maxRetries ? sleep(Math.pow(2, task.attempt) * 200 as long) && 'retry' : 'ignore' }
//errorStrategy = { sleep(Math.pow(2, task.attempt) * 200 as long); return 'retry' }
errorStrategy = 'ignore'
publishDir = [
[
path: { ["${task.assembly_polishing_directory_name}", "Pilon", "Fasta"].join(File.separator) },
Expand Down
44 changes: 28 additions & 16 deletions modules/local/report.nf
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ process REPORT{
def data_stride = 3 // report values added in groups of three, e.g sample meta info, parameters, output file of interest
def headers_list = 'headers' // ! TODO this string exists twice, need to fix that
def arr_size = test_in.size()
def qc_species_tag = "QCParameterSelection"
for(long i = 0; i < arr_size; i=i+data_stride){
def meta_data = test_in[i]
def report_tag = test_in[i+1]
Expand Down Expand Up @@ -76,8 +77,8 @@ process REPORT{
def search_phrases = qc_params_species()

// Add in quality information in place
generate_qc_data(sample_data, search_phrases)
create_action_call(sample_data)
generate_qc_data(sample_data, search_phrases, qc_species_tag)
create_action_call(sample_data, qc_species_tag)


def json_converted_data = new JsonBuilder(sample_data).toPrettyString()
Expand Down Expand Up @@ -124,6 +125,7 @@ def generate_coverage_data(sample_data, bp_field, species){
// Add fixed genome coverage for species if desired
def species_data_pos = 1;
if(base_counts_p
&& species[species_data_pos] != null
&& species[species_data_pos].containsKey("fixed_genome_size")
&& species[species_data_pos].fixed_genome_size != null){

Expand Down Expand Up @@ -197,7 +199,7 @@ def populate_qual_message(qual_data){
}

// Action: Reisolate and resequence, resequence, all good.
def create_action_call(sample_data){
def create_action_call(sample_data, species_tag){
/*Define criteria used to create base sketches
TODO Need to test a falthrough sample (e.g. unspeciated to see what happens)
Expand All @@ -224,6 +226,9 @@ def create_action_call(sample_data){
TODO creating a logic heavy function that needs to be refactored
For addressing the defect, the Passed and failed messeges have been broken up, all that remains is to have the
final summary, checks passed and checks failed
*/

for(val in sample_data){
Expand All @@ -245,8 +250,7 @@ def create_action_call(sample_data){
final_message = "[FAILED] Sample was determined to be metagenomic, and this was not specied as" +
" a metagenomic run indicating contamination REISOLATION AND RESEQUENCING RECOMMENDED." +
"There is additionally a possibility that your sample could not be identified as it is novel and " +
"not included in the mash sketch provided to the pipeline (however this would be very rare), "+
"but if this is the case please disregard this message."
"not included in the program used to taxonomically classify your pipeline (however this is an unlikely culprit)."
}
sample_data[val.key]["QCStatus"] = sample_status
sample_data[val.key]["QCSummary"] = final_message
Expand Down Expand Up @@ -279,7 +283,6 @@ def create_action_call(sample_data){
if(!meta_data.assembly){
// We should have reads as we assembled it
if(qual_data && qual_data.containsKey("raw_average_quality") && !qual_data.raw_average_quality.status){
//qual_message.add(params.QCReportFields.raw_average_quality.low_msg)
resequence += 1
checks_failed += 1
}else if (qual_data && (!qual_data.containsKey("raw_average_quality") || !qual_data.raw_average_quality.status)){
Expand All @@ -290,7 +293,7 @@ def create_action_call(sample_data){
checks += 1

if(qual_data && qual_data.containsKey("average_coverage") && !qual_data.average_coverage.status){
//qual_message.add(params.QCReportFields.average_coverage.low_msg)

if(meta_data.downsampled){
qual_message.add("The sample may have been downsampled too aggressively, if this is the cause please re-run sample with a different target depth.")
}
Expand Down Expand Up @@ -341,7 +344,6 @@ def create_action_call(sample_data){
checks += 1



(reisolate, resequence) = n50_nrcontigs_decision(qual_data, nr_contigs_failed, n50_failed, qual_message, reisolate, resequence)
//qual_message.add("Quality Conclusion")

Expand All @@ -364,17 +366,26 @@ def create_action_call(sample_data){
qual_message.add("[PASSED] All Checks passed")
sample_status = "PASSED"
}
qual_message.add("Passed Tests: ${checks - checks_failed - checks_ignored}/${checks}")

def organism_criteria = sample_data[val.key][species_tag]
def tests_passed = "Passed Tests: ${checks - checks_failed - checks_ignored}/${checks}"
qual_message.add(tests_passed)

qual_message.add("Species ID: ${val.value[val.key][params.top_hit_species.report_tag]}")
def species_id = "Species ID: ${val.value[val.key][params.top_hit_species.report_tag]}"
qual_message.add(species_id)

// Qual summary not final message
final_message = qual_message.join("\n")
def terminal_message = populate_qual_message(qual_data).join("\n")
log.info "\n$val.key\n${terminal_message}\n${sample_status}\n${final_message}"

// Reseq recommended should go to a seperate field
// Requested output should be: [PASS|FAILED] Species ID: [species] [Tests passed] [Organism criteria available]
qc_message = "${sample_status} ${species_id}; ${tests_passed}; Organism QC Criteria: ${organism_criteria}"

sample_data[val.key]["QCSummary"] = qc_message
sample_data[val.key]["QCStatus"] = sample_status
sample_data[val.key]["QCSummary"] = final_message
sample_data[val.key]["QCMessage"] = final_message
}

}
Expand Down Expand Up @@ -604,7 +615,8 @@ def get_species(value, search_phrases, shortest_token){
shortest_token: contains values to scrub from value to be searched for
*/

def qc_data = null;

def qc_data = [params.QCReport.fallthrough.search, params.QCReport.fallthrough];
if(value == null){
return qc_data
}
Expand All @@ -626,7 +638,6 @@ def get_species(value, search_phrases, shortest_token){
def get_qc_data_species(value_data, qc_data){
def quality_messages = [:]


params.QCReportFields.each{
k, v ->
if(v.on){ // only use the fields specified in the config
Expand All @@ -649,7 +660,7 @@ def get_qc_data_species(value_data, qc_data){
return quality_messages;
}

def generate_qc_data(data, search_phrases){
def generate_qc_data(data, search_phrases, qc_species_tag){
/*
data: sample data in a LazyMap
search_phrases: normalized search phrases from the nextflow.config
Expand All @@ -659,12 +670,13 @@ def generate_qc_data(data, search_phrases){
def top_hit_tag = params.top_hit_species.report_tag;
def quality_analysis = "QualityAnalysis"
def shortest_token = get_shortest_token(search_phrases)
def species_tag_location = 0
for(k in data){
if(!k.value.meta.metagenomic){
def species = get_species(k.value[k.key][top_hit_tag], search_phrases, shortest_token)
//generate_coverage_data(data[k.key], params.seqtk_size.report_tag, species) // update coverage first so its values can be used in generating qc messages
generate_coverage_data(data[k.key], params.coverage_calc_fields.bp_field, species) // update coverage first so its values can be used in generating qc messages
data[k.key][quality_analysis] = get_qc_data_species(k.value[k.key], species)
data[k.key][qc_species_tag] = species[species_tag_location]
}else{
data[k.key][quality_analysis] = ["Metagenomic": ["message": null, "status": false]]
data[k.key][quality_analysis]["Metagenomic"].message = "The sample was determined to be metagenomic, summary metrics will not be generated" +
Expand Down Expand Up @@ -865,7 +877,7 @@ def table_values(file_path, header_p, seperator, headers=null){
}
}

return rows_list.indexed().collectEntries { idx, row ->
return rows_list.indexed().collectEntries { idx, row ->
[(idx): row.collectEntries { k, v -> [(k): replace_missing(v)] }]
}
}
16 changes: 8 additions & 8 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ params {
show_hidden_params = false
validationS3PathCheck = true
validationShowHiddenParams = false
validationSchemaIgnoreParams = 'abricate,locidex,assembly_status,bakta,bandage,checkm,chopper,contigs_too_short,coreutils,coverage_calc_fields,ectyper,fastp,fastqc,filtered_reads,flye,kat,kleborate,kraken,kraken_bin,kraken_species,lissero,mash,mash_meta,medaka,minimap2,mlst,mobsuite_recon,opt_platforms,pilon,pilon_iterative,pointfinder_db_tag,python3,QCReport,QCReport-fields,QCReportFields,quast,racon,raw_reads,report_aggregate,r_contaminants,samtools,seqkit,seqtk,seqtk_size,shigeifinder,sistr,spades,spatyper,staramr,subtyping_report,top_hit_species,unicycler'
validationSchemaIgnoreParams = 'top_hit_method,abricate,locidex,assembly_status,bakta,bandage,checkm,chopper,contigs_too_short,coreutils,coverage_calc_fields,ectyper,fastp,fastqc,filtered_reads,flye,kat,kleborate,kraken,kraken_bin,kraken_species,lissero,mash,mash_meta,medaka,minimap2,mlst,mobsuite_recon,opt_platforms,pilon,pilon_iterative,pointfinder_db_tag,python3,QCReport,QCReport-fields,QCReportFields,quast,racon,raw_reads,report_aggregate,r_contaminants,samtools,seqkit,seqtk,seqtk_size,shigeifinder,sistr,spades,spatyper,staramr,subtyping_report,top_hit_species,unicycler'
validationFailUnrecognisedParams = false // for the qcreport fields

// SKIP options
Expand Down Expand Up @@ -193,8 +193,8 @@ params {

// Python container, May switch for pypy3
python3 {
singularity = "docker.io/python:3.11.6"
docker = "docker.io/python:3.11.6"
singularity = "quay.io/biocontainers/staramr:0.10.0--pyhdfd78af_0"
docker = "quay.io/biocontainers/staramr:0.10.0--pyhdfd78af_0"
}

seqtk {
Expand Down Expand Up @@ -424,14 +424,14 @@ params {
report_tag = "SpeciesTopHit"
}

kraken_species {
report_tag = "Kraken2TopHit"
top_hit_method {
report_tag = "IdentificationMethod"
}

r_contaminants {
// container contains minimap2 and samtools
singularity = "https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0"
docker = "quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:8f2087d838e5270cd83b5a016667234429f16eea-0"
singularity = "https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0"
docker = "quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0"
phix_fa = ""
homo_sapiens_fa = ""
pacbio_mg = ""
Expand Down Expand Up @@ -1083,7 +1083,7 @@ manifest {
description = """Mikrokondo"""
mainScript = 'main.nf'
nextflowVersion = '!>=23.04.0'
version = '0.2.1'
version = '0.3.0'
defaultBranch = 'main'
doi = ''
}
Expand Down
Loading

0 comments on commit 31c494a

Please sign in to comment.