forked from naumenko-sa/cre
-
Notifications
You must be signed in to change notification settings - Fork 0
/
cre.omim.inheritance.py
executable file
·54 lines (43 loc) · 1.32 KB
/
cre.omim.inheritance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/bin/env python
# extracts information about omim inheritance modes from genemap2.txt
import re
from os.path import expanduser
home = expanduser('~')
inheritance = {}
inheritance['Autosomal recessive']='AR'
inheritance['Autosomal dominant'] = 'AD'
inheritance['X-linked recessive'] = 'XLR'
inheritance['X-linked dominant'] = 'XLD'
inheritance['Isolated cases'] = 'IC'
inheritance['Mitochondrial'] = 'Mi'
inheritance['X-linked'] = 'XL'
inheritance['Y-linked'] = 'YL'
inheritance['Digenic recessive'] = 'DR'
inheritance['Digenic dominant'] = 'DD'
inheritance['Multifactorial'] = 'Mu'
inheritance['Somatic mosaicism'] = 'Smo'
inheritance['Somatic mutation'] = 'Smu'
genes = {}
f1 = open(home+'/cre/data/ensembl_w_description.txt','r')
for line in f1:
ar = line.split('\t')
genes[ar[0]] = ar[1]
f1.close()
#save non empty lines
#gene inheritance
f = open('omim_by_orion.txt','r')
print 'Ensembl_gene_id Gene_name2 Omim_inheritance'
for line in f:
ar = line.split('\t')
match = re.search(r'ENSG[0-9]{11}',line)
if match:
modes = []
#print 'found', match.group()
for key in inheritance.keys():
match1 = re.search(key,line)
if match1:
#print 'found inheritance', match1.group()
modes.append(inheritance[key])
if len(modes)>0:
print match.group(), genes.get(match.group()), ','.join(modes)
f.close()