-
Notifications
You must be signed in to change notification settings - Fork 155
/
haplo
executable file
·138 lines (108 loc) · 4.77 KB
/
haplo
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#!/usr/bin/env perl
# Copyright [2016-2024] EMBL-European Bioinformatics Institute
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
use Getopt::Long;
use FindBin qw($RealBin);
use lib $RealBin;
use lib $RealBin.'/modules';
use Bio::EnsEMBL::VEP::Haplo::Runner;
use Bio::EnsEMBL::VEP::Utils qw(get_version_string);
my $config = {};
my $arg_count = scalar @ARGV;
my @argv_copy = @ARGV;
GetOptions(
$config,
'help', # displays help message
# input options,
'config=s', # config file name
'input_file|i=s', # input file name
'format=s', # input file format
'output_format=s', # output file format
'delimiter=s', # delimiter between fields in input
# haplo specific
'haplotype_frequencies=s',
# DB options
'species=s', # species e.g. human, homo_sapiens
'registry=s', # registry file
'host=s', # database host
'port=s', # database port
'user|u=s', # database user name
'password|pass=s', # database password
'db_version=i', # Ensembl database version to use e.g. 62
'assembly|a=s', # assembly version to use
'genomes', # automatically sets DB params for e!Genomes
'refseq', # use otherfeatures RefSeq DB instead of Ensembl
'merged', # use merged cache
'all_refseq', # report consequences on all transcripts in RefSeq cache, includes CCDS, EST etc
'gencode_basic', # limit to using just GenCode basic transcript set
'is_multispecies=i', # '1' for a multispecies database (e.g protists_euglenozoa1_collection_core_29_82_1)
# runtime options
'chr=s', # analyse only these chromosomes, e.g. 1-5,10,MT
'phased', # force VCF genotypes to be interpreted as phased
'dont_skip', # don't skip vars that fail validation
'transcript_filter=s', # transcript filter
# verbosity options
'verbose|v', # print out a bit more info while running
'quiet|q', # print nothing to STDOUT (unless using -o stdout)
# output options
'output_file|o=s', # output file name
'warning_file=s', # file to write warnings to
'force_overwrite|force', # force overwrite of output file if already exists
'json', # create JSON output
'dont_export=s', # list of JSON keys to exclude
# cache stuff
'database', # must specify this to use DB now
'cache', # use cache
'cache_version=i', # specify a different cache version
'show_cache_info', # print cache info and quit
'dir=s', # dir where cache is found (defaults to $HOME/.vep/)
'dir_cache=s', # specific directory for cache
'offline', # offline mode uses minimal set of modules installed in same dir, no DB connection
'custom=s' => ($config->{custom} ||= []), # specify custom tabixed bgzipped file with annotationl
'gff=s', # shortcut to --custom [file],,gff
'gtf=s', # shortcut to --custom [file],,gtf
'fasta=s', # file or dir containing FASTA files with reference sequence
'sereal', # user Sereal instead of Storable for the cache
'synonyms=s', # file of chromosome synonyms
# debug
'debug', # print out debug info
) or die "ERROR: Failed to parse command-line flags\n";
&usage && exit(0) if (!$arg_count) || $config->{help};
$config->{database} ||= 0;
my $runner = Bio::EnsEMBL::VEP::Haplo::Runner->new($config);
$runner->run();
# outputs usage message
sub usage {
my $versions = get_version_string($RealBin.'/.version');
my $usage =<<END;
#-------------#
# HAPLOSAURUS #
#-------------#
Versions:
$versions
Help: dev\@ensembl.org , helpdesk\@ensembl.org
Twitter: \@ensembl
Usage:
perl haplo.pl [--cache|--offline|--database] [arguments]
Basic options
=============
--help Display this message and quit
-i | --input_file Input file
-o | --output_file Output file
--force_overwrite Force overwriting of output file
--species [species] Species to use [default: "human"]
END
print $usage;
}
1;