-
Notifications
You must be signed in to change notification settings - Fork 7
/
nextflow.config
executable file
·147 lines (131 loc) · 7.73 KB
/
nextflow.config
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
/*
* -------------------------------------------------
* TRON-Bioinformatics/covigator-ngs-pipeline Nextflow config file
* -------------------------------------------------
*/
// default SARS-CoV-2 references
params.sarscov2_reference = "$baseDir/reference/Sars_cov_2.ASM985889v3.dna.toplevel.fa"
params.sarscov2_gff = "$baseDir/reference/Sars_cov_2.ASM985889v3.101.gff3"
params.sarscov2_snpeff_data = "$baseDir/reference/snpeff/"
params.sarscov2_snpeff_config = "$baseDir/reference/snpeff/snpEff.config"
params.sarscov2_snpeff_organism = "Sars_cov_2.ASM985889v3.101"
// problematic sites
params.problematic_sites = "$baseDir/reference/problematic_sites_sarsCov2.vcf.gz"
// conservation annotations
params.conservation_sarscov2 = "$baseDir/reference/wuhCor1.mutDepletionConsHMM.bed.gz"
params.conservation_sarscov2_header = "$baseDir/reference/wuhCor1.mutDepletionConsHMM.header.txt"
params.conservation_sarbecovirus = "$baseDir/reference/wuhCor1.mutDepletionSarbecovirusConsHMM.bed.gz"
params.conservation_sarbecovirus_header = "$baseDir/reference/wuhCor1.mutDepletionSarbecovirusConsHMM.header.txt"
params.conservation_vertebrate = "$baseDir/reference/wuhCor1.mutDepletionVertebrateCoVConsHMM.bed.gz"
params.conservation_vertebrate_header = "$baseDir/reference/wuhCor1.mutDepletionVertebrateCoVConsHMM.header.txt"
// pfam domain annotations
params.pfam_names = "$baseDir/reference/pfam_names.bed.gz"
params.pfam_descriptions = "$baseDir/reference/pfam_descriptions.bed.gz"
params.pfam_names_header = "$baseDir/reference/pfam_names.header.txt"
params.pfam_descriptions_header = "$baseDir/reference/pfam_descriptions.header.txt"
profiles {
conda {
params.enable_conda = true
conda.enabled = true
conda.useMamba = true
}
debug { process.beforeScript = 'echo $HOSTNAME' }
test {
params.cpus = 1
params.memory = "3g"
timeline.enabled = false
report.enabled = false
trace.enabled = false
dag.enabled = false
}
test_fasta {
params.fasta = "$baseDir/tests/test_data/test_data.fasta"
params.name = "test"
params.output = "covigator_fasta_test"
}
test_fastq {
params.fastq1 = "$baseDir/tests/test_data/test_data_1.fastq.gz"
params.name = "test"
params.output = "covigator_fastq_test"
}
}
// Export this variable to prevent local Python libraries from conflicting with those in the container
env {
PYTHONNOUSERSITE = 1
}
// Capture exit codes from upstream processes when piping
process.shell = ['/bin/bash', '-euo', 'pipefail']
cleanup = true
conda.createTimeout = '1 h'
VERSION = '0.17.0'
manifest {
name = 'TRON-Bioinformatics/covigator-ngs-pipeline'
author = 'Pablo Riesgo-Ferreiro, Patrick Sorn, Thomas Bukur'
homePage = 'https://github.com/TRON-Bioinformatics/covigator-ngs-pipeline'
description = 'A Nextflow pipeline to process NGS data from SARS-CoV-2'
mainScript = 'main.nf'
nextflowVersion = '>=19.10.0'
version = VERSION
}
params.help_message = """
Covigator NGS pipeline v${VERSION}
Usage:
nextflow run tron-bioinformatics/covigator-ngs-pipeline -profile conda --help
Input:
* --fastq1: the first input FASTQ file (not compatible with --fasta, nor --vcf)
* --fasta: the FASTA file containing the assembly sequence (not compatible with --fastq1, nor --vcf)
* --vcf: the VCF file containing mutations to analyze (not compatible with --fastq1, nor --fasta)
* --bam: the BAM file containing reads to annotate VAFs on a VCF (not compatible with --fastq1, nor --fasta)
* --bai: the BAI index for a BAM file (not compatible with --fastq1, nor --fasta)
* --name: the sample name, output files will be named after this name
* --output: the folder where to publish output
* --input_fastqs_list: alternative to --name and --fastq1 for batch processing
* --library: required only when using --input_fastqs
* --input_fastas_list: alternative to --name and --fasta for batch processing
* --input_vcfs_list: alternative to --name and --vcf for batch processing
* --input_bams_list: alternative to --name, --vcf, --bam and --bai for batch processing
Optional input only required to use a custom reference:
* --reference: the reference genome FASTA file, *.fai, *.dict and bwa indexes are required.
* --gff: the GFFv3 gene annotations file (required to run iVar and to phase mutations from all variant callers)
* --snpeff_data: path to the SnpEff data folder, it will be useful to use the pipeline on other virus than SARS-CoV-2
* --snpeff_config: path to the SnpEff config file, it will be useful to use the pipeline on other virus than SARS-CoV-2
* --snpeff_organism: organism to annotate with SnpEff, it will be useful to use the pipeline on other virus than SARS-CoV-2
* --reference_generate: Run reference generate to prepare a reusable custom reference
Optional input:
* --fastq2: the second input FASTQ file
* --primers: a BED file containing the primers used during library preparation. If provided primers are trimmed from the reads. Only applicable to FASTQs.
* --min_base_quality: minimum base call quality to take a base into account for variant calling (default: 20)
* --min_mapping_quality: minimum mapping quality to take a read into account for variant calling (default: 20)
* --vafator_min_base_quality: minimum base call quality to take a base into account for VAF annotation (default: 0)
* --vafator_min_mapping_quality: minimum mapping quality to take a read into account for VAF annotation (default: 0)
* --low_frequency_variant_threshold: VAF threshold to mark a variant as low frequency (default: 0.02)
* --subclonal_variant_threshold: VAF superior threshold to mark a variant as subclonal (default: 0.5)
* --lq_clonal_variant_threshold: VAF superior threshold to mark a variant as loq quality clonal (default: 0.8)
* --memory: the ammount of memory used by each job (default: 3g)
* --cpus: the number of CPUs used by each job (default: 1)
* --skip_lofreq: skips calling variants with LoFreq
* --skip_gatk: skips calling variants with GATK
* --skip_bcftools: skips calling variants with BCFTools
* --skip_ivar: skips calling variants with iVar
* --skip_pangolin: skips lineage determination with pangolin
* --match_score: global alignment match score, only applicable for assemblies (default: 2)
* --mismatch_score: global alignment mismatch score, only applicable for assemblies (default: -1)
* --open_gap_score: global alignment open gap score, only applicable for assemblies (default: -3)
* --extend_gap_score: global alignment extend gap score, only applicable for assemblies (default: -0.1)
* --skip_sarscov2_annotations: skip some of the SARS-CoV-2 specific annotations (default: false)
* --keep_intermediate: keep intermediate files (ie: BAM files and intermediate VCF files)
* --args_bcftools_mpileup: additional arguments for bcftools mpileup command (eg: --args_bcftools_mpileup='--ignore-overlaps')
* --args_bcftools_call: additional arguments for bcftools call command (eg: --args_bcftools_call='--something')
* --args_lofreq: additional arguments for lofreq command (eg: --args_lofreq='--something')
* --args_gatk: additional arguments for gatk command (eg: --args_gatk='--something')
* --args_ivar_samtools: additional arguments for ivar samtools mpileup command (eg: --args_ivar_samtools='--ignore-overlaps')
* --args_ivar: additional arguments for ivar command (eg: --args_ivar='--something')
Output:
* Output a VCF file for each of BCFtools, GATK and LoFreq when FASTQ files are
provided or a single VCF obtained from a global alignment when a FASTA file is provided
* Output a TSV file output from iVar
* Only when FASTQs are provided:
* FASTP statistics
* Depth and breadth of coverage analysis results
* Picard's deduplication metrics
"""