forked from cyntsc/Motif_Detectives
-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse_gtf.py
executable file
·61 lines (54 loc) · 1.44 KB
/
parse_gtf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env python3
import sys
import re
#file_gff = sys.argv[1]
file_gff = "/Users/student/PFB_Brianda/Files/Caenorhabditis_elegans.WBcel235.108.chromosome.I.gff3"
genome = {}
chromosomes = {}
gene_feature_d = {}
coordinates_db = {}
coordinates_ds = {}
descr_list = []
chrs_list = []
ftr_list = []
st = []
en = []
gene_feature={}
with open (file_gff,'r') as gtf:
for line in gtf:
line = line.rstrip()
if line.startswith('#'):
continue
I, Wormbase, feature, start, end, a, strand, b, description = line.split('\t')
print(line)
# for i in I:
# chrs=i
# for c in chromosomes:
# c=chromosomes['chromosome']
# chrs_list.append(chrs)
print(chromosomes)
for e in feature:
ftr=feature
gene_feature(feature) += ftr
# ftr_list.append(ftr)
print(gene_feature)
# print(ftr)
for s in start:
st= start
# print(st)
for n in end:
en= end
# print(en)
# chromosomes['chromosome']=chrs
# gene_feature_d['gene_feature']=ftr
# coordinates_db['coordinates']=coordinates_ds
# coordinates_ds['start','end']=st,en
#descr_list['description']=description
# genome['chromosome']={I:{{'gene_feature':feature},{'coordinates':{'start':start,'end':end}},{'description':description}}}
#print(gene_feature_d)
# print(description)
descr_list.append(description)
#print(descr_list)
# for descr in descr_list:
# split_descr = descr.split(';')
#def gtf_parse(