forked from mariogiov/ngi_pipeline
-
Notifications
You must be signed in to change notification settings - Fork 24
/
test_ngi_config.yaml
171 lines (156 loc) · 6.53 KB
/
test_ngi_config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# An edited copy of this file should be placed under $HOME/.ngiconfig/ngi_config.yaml
# or its path exported as the environment variable NGI_CONFIG
analysis:
workflows:
NGI:
analysis_engine: ngi_pipeline.engines.piper_ngi
mate_pair:
analysis_engine: ngi_pipeline.engines.de_novo_pipeline
RNA:
analysis_engine: ngi_pipeline.engines.bcbio_ngi
best_practice_analysis:
whole_genome_reseq:
analysis_engine: ngi_pipeline.engines.piper_ngi
IGN:
analysis_engine: ngi_pipeline.engines.piper_ngi
qc:
analysis_engine: ngi_pipeline.engines.qc_ngi
wgs_germline:
analysis_engine: ngi_pipeline.engines.sarek
exome_germline:
analysis_engine: ngi_pipeline.engines.sarek
# top_dir is the directory used by the pipeline as a starting point to build up the analysis run
# see ngi_pipeline/utils/filesystem.py for details.
# for nestor the default is /proj/a2014205/nobackup/NGI/analysis_ready
top_dir: /lupus/ngi/staging/wildwest/ngi2016001/nobackup/NGI
sthlm_root: ngi2016003
upps_root: ngi2016001
# for nestor it is simply /proj
base_root: /lupus/ngi/staging/wildwest
database:
# SQLite file to know what/where/how things are happening (state machine to back up Charon for network failure)
# -at nestor it is at /proj/a2014205/ngi_resources/record_tracking_database_nestor.sql
# -if you are trying to work without one, it is going to create an empty schema that is
# sqlite> .fullschema
# CREATE TABLE sampleanalysis (
# project_id VARCHAR(50) NOT NULL,
# project_name VARCHAR(50),
# project_base_path VARCHAR(100),
# sample_id VARCHAR(50) NOT NULL,
# workflow VARCHAR(50) NOT NULL,
# engine VARCHAR(50),
# analysis_dir VARCHAR(100),
# process_id INTEGER,
# slurm_job_id INTEGER,
# PRIMARY KEY (project_id, sample_id, workflow)
# );
# Compulsory to define: to make sure you are not overwriting the production version below, it is commented out,
# forcing you to edit the config file
record_tracking_db_path: /lupus/ngi/staging/wildwest/ngi2016001/private/db/record_tracking_database.sql
environment:
project_id: ngi2016001
# directory containing scripts like ngi_pipeline_start.py, print_running_jobs.py etc
# on nestor the production code at /proj/a2014205/software/ngi_pipeline/scripts
ngi_scripts_dir: /lupus/ngi/staging/latest/sw/ngi_pipeline/scripts
conda_env: ngi_pipeline
# Flowcell directories; the path string must contain the strings either /a2014205/ or /a2015179/
# see ngi_pipeline/ngi_pipeline/conductor/flowcell.py:setup_analysis_directory_structure() for details
# On nestor the default values should be /proj/a2014205/archive /proj/a2015179/archive
flowcell_inbox:
- /lupus/ngi/staging/wildwest/ngi2016001/incoming
- /lupus/ngi/staging/wildwest/ngi2016003/incoming
logging:
# the log file itself is compulsory to be defined, or you will get a nasty exception
# to make sure you are defining it, and not overwriting the production one below, it is left commented out
# default location is /proj/a2014205/ngi_resources/ngi_pipeline.log
log_file: /lupus/ngi/staging/wildwest/ngi2016001/private/log/ngi_pipeline.test.log
paths: # Hard code paths here if you are that kind of a person
binaries:
#bowtie2:
#fastqc:
#fastq_screen:
references:
#log: /base/to/proj/a2010002/data/log
#store_dir: /base/to/proj/a2010002/archive
piper:
# The engine we are usually runnig - further engines should be configured also like this.
# Also can be set as an environmental variable $PIPER_QSCRIPTS_DIR
# nestor default location is /proj/a2014205/software/piper/qscripts
path_to_piper_qscripts: /base/to/proj/piper/qscripts
load_modules:
- java/sun_jdk1.7.0_25
- R/2.15.0
threads: 16
job_walltime:
merge_process_variantcall: "10-00:00:00"
#sample:
# required_autosomal_coverage: 28.4
shell_jobrunner: Shell
#shell_jobrunner: ParallelShell --super_charge --ways_to_split 4
#jobNative:
# - arg1
# - arg2
# - arg3
sarek:
tag: 2.6
tools:
- haplotypecaller
- snpeff
genomes_base_paths:
GRCh37: /sw/data/uppnex/ToolBox/ReferenceAssemblies/hg38make/bundle/2.8/b37/
GRCh38: /sw/data/uppnex/ToolBox/hg38bundle/
qc:
# These qc modules are related to pre-analysis QC runs
load_modules:
- bioinfo-tools
fastqc:
load_modules:
- FastQC
threads: 16
fastq_screen:
config_path: "/proj/a2014205/ngi_resources/fastq_screen.nestor.conf"
load_modules:
- bowtie2
- fastq_screen
subsample_reads: 200000
threads: 1
slurm:
extra_params:
"--qos": "seqver"
cores: 16
supported_genomes:
#"GRCh37": "/apus/data/uppnex/reference/Homo_sapiens/GRCh37/concat/Homo_sapiens.GRCh37.57.dna.concat.fa"
"GRCh37": /proj/a2014205/piper_references/gatk_bundle/2.8/b37/human_g1k_v37.fasta
"GRCm38": "/apus/data/uppnex/reference/Mus_musculus/GRCm38/concat/Mus_musculus.GRCm38.69.dna.concat.fa"
"rn4": None
"saccer2": None
"dm3": None
"tair9": None
"xentro2": None
"ws210": None
"canfam3": None
#project:
# INBOX: /proj/a2010002/archive
test_data:
workflows:
whole_genome_reseq:
test_project:
project_id: P0000
project_name: "Y.Mom_15_01"
bpa: "whole_genome_reseq"
customize_config:
analysis:
top_dir: /proj/a2014205/nobackup/NGI/test_data/
charon:
charon_base_url: http://charon-dev.scilifelab.se
local_files:
flowcell: /proj/a2014205/nobackup/NGI/test_data/whole_genome_reseq/150424_ST-E00214_0031_BH2WY7CCXX
#project: /proj/a2014205/nobackup/NGI/test_data/whole_genome_reseq/DATA/P0000
vcf: /proj/a2014205/nobackup/NGI/test_data/whole_genome_reseq/vcf/
mail:
# For testing you can change the default recipient here.
# By default (when this value is not set) mails are sent to
# "[email protected]" (see ngi_pipeline/utils/communication.py for details)
# If you do not want to have mails, use the "quiet" option like
# scripts/print_running_jobs.py -q
recipient: [email protected]