-
Notifications
You must be signed in to change notification settings - Fork 7
/
trf2gff.py
24 lines (21 loc) · 852 Bytes
/
trf2gff.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# -*- coding: utf-8 -*-
# <nbformat>3.0</nbformat>
# <codecell>
# really simple parsing of the TRF output
# requires the -ngs flag set (available from TRF version 4.07b)
# generates a GFF file
# untested, use at your own risk
import sys
with open(sys.argv[1]) as fh:
for line in fh:
ele = line.strip().split(" ")
if line.startswith('@'):
seq_name = ele[0][1:]
else:
[start, stop, period, copies,
consensus_size, perc_match, perc_indels,
align_score, perc_A, perc_C, perc_G, perc_T,
entropy, cons_seq, repeat_seq, left_flank, right_flank] = ele
gff_line = [seq_name, 'TRF', cons_seq + '_' + copies + '_copies',
start, stop, '.', '.', '.', 'Name='+ cons_seq + '_' + copies + '_copies']
print '\t'.join(gff_line)