-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.yaml
57 lines (42 loc) · 2.43 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#Config file for paqr annotations snakemake workflow
#Path to GTF file downloaded from GENCODE website
#Select the 'Content':Comprehensive gene annotation, 'Regions:' CHR file
gencode_gtf: gencode.vM25.annotation.gtf
#path to BED file downloaded from polyAsite database website
polyA_bed: atlas.clusters.2.0.GRCm38.96.bed
#version number of PolyASite atlas
#put 1 for first release (2014/15), 2 for second release (2019)
polyA_atlas_version: 2
#Name of directory to store output files
output_dir: mouse_new_annotations_no_tsl/
#Remove version number from transcript & gene_ids?
#e.g. ENSMUST00000082908.1 --> ENSMUST00000082908 (if "yes")
#put "yes" or "no" (in quotes)
strip_version_number: "yes"
#Name of output BED12 transcripts file (stored in output_dir)
transcripts_name: full_transcripts.mm10.vM25.no_tsl.atlas2.canonical_chr.tandem.noOverlap_strand_specific.bed
#Name of output BED12 polyA clusters file (stored in output_dir)
clusters_name: clusters.mm10.vM25.no_tsl.atlas2.canonical_chr.tandem.noOverlap_strand_specific.bed
#name of intermediate GTF file where entries for non-overlapping, multi-poly(A) site genes are written
#written to <output_dir>/<output_subdir>/<transcripts_gtf_name
transcripts_gtf_name: non_overlapping.multi_polya.tr.gencode.mm10.vM25.no_tsl.atlas2.annotation.gtf
####--------------------------------------------
#Options for transcript support level filtering
####--------------------------------------------
#remove transcripts with 'NA' tag for transcript support level
#put "yes" or "no" (in quotes)
remove_na_transcript_support_level: "no"
#Filter at gene level for best supported transcripts by transcript_support_level
#e.g. if gene has 3 transcripts with TSL: 1 & 1 transcript TSL:3, TSL:1 transcript dropped (all TSL: 1 transcripts are kept)
#note: ignored if remove_na_transcript_support_level: "no" (sorting fails with NA values (as it stands...))
#put "yes" or "no" (in quotes)
gene_best_supported_transcripts: "no"
#minimum transcript support level for transcripts to be retained in final output file
#value between 1 & 5 (5 = lowest support)
#note: ignored if remove_na_transcript_support_level: "no" (sorting fails with NA values (as it stands...))
minimum_transcript_support_level: 5
######----------------------------------------------
#parameters/definitions for pipeline & intermediate files - NO NEED MESS WITH THESE
######----------------------------------------------
scripts_dir: scripts/
output_subdir: intermediates/