forked from cyntsc/Motif_Detectives
-
Notifications
You must be signed in to change notification settings - Fork 0
/
md_fasta_parser.py
executable file
·47 lines (40 loc) · 1.03 KB
/
md_fasta_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/usr/bin/env python3
import gzip
import glob
import re
import sys
cgenome = {}
def open_fasta(fasta_file):
for file_name in glob.glob(fasta_file):
seqs = []
with gzip.open(fasta_file,'rt') as raw:
#print(name)
label_regex = re.compile(r'([IVX]+)')
label = str(label_regex.findall(file_name)).strip('[]')
label = label.replace("'","")
# print(label)
chromosome_label = 'Chromosome.'+label
# print(chromosome_label)
for line in raw:
if line.startswith('>'):
# print(line)
next
else:
line = line.rstrip()
seqs.append(line)
sequence = ''.join(seqs)
cgenome[chromosome_label]=sequence
return cgenome
def main():
progname=sys.argv[0]
usage = "\n\n\tusage: {} fasta_file\n\n\n".format(progname)
if len(sys.argv) < 1 :
sys.stderr.write(usage)
sys.exit(1)
#capture command-line arguments
fasta_file=sys.argv[1]
parsed_genome=open_fasta(fasta_file)
print(parsed_genome)
sys.exit(0)
if __name__ == "__main__":
main()