Skip to content

Commit

Permalink
Fix error in load_gff when no source is specified
Browse files Browse the repository at this point in the history
  • Loading branch information
abretaud committed Dec 11, 2018
1 parent 8201a2d commit 7f7acac
Show file tree
Hide file tree
Showing 4 changed files with 450 additions and 1 deletion.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ $ chakin feature load_fasta \
- 2.2.5
- Added support for units in expression loaders
- Fix error in load_gff when no source is specified
- 2.2.4
- Fix broken --skip_missing option for load_go
Expand Down
4 changes: 3 additions & 1 deletion chado/feature/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -741,7 +741,9 @@ def _load_gff_feature_with_children(self, rec, f, analysis_id, organism_id, re_p
pep_uname = detected_protein_id
else:
pep_uname = added_feat['uniquename'] + '-protein'
polypeptide = SeqFeature(FeatureLocation(min_cds, max_cds), type="polypeptide", strand=f.location.strand, qualifiers={"source": subrna.qualifiers['source'], 'ID': [pep_uname], 'Name': [added_feat['name']]})
polypeptide = SeqFeature(FeatureLocation(min_cds, max_cds), type="polypeptide", strand=f.location.strand, qualifiers={'ID': [pep_uname], 'Name': [added_feat['name']]})
if 'source' in subrna.qualifiers:
polypeptide.qualifiers['source'] = subrna.qualifiers['source']

protein_seq = None
if not no_seq_compute and len(rec.seq) > 0 and str(rec.seq)[0:10] != "??????????":
Expand Down
172 changes: 172 additions & 0 deletions test-data/annot_nosource.gff
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
##gff-version 3
##sequence-region scaffold00001 4058460 4062210
scaffold00001 . supercontig 1 5927163 . . . Name=scaffold00001;ID=scaffold00001
scaffold00001 . gene 3058460 3062210 . + . ID=orange1.1g015633m.g;Name=orange1.1g015633m.g
scaffold00001 . mRNA 3058460 3062210 . + . ID=PAC:18136238;Name=orange1.1g015633m;PACid=18136238;Parent=orange1.1g015633m.g
scaffold00001 . five_prime_UTR 3058460 3058898 . + . ID="an_utr_with_two_parents";Parent=PAC:18136238,PAC:18136239;PACid=18136238
scaffold00001 . five_prime_UTR 3059019 3059074 . + . Parent=PAC:18136238;PACid=18136238
scaffold00001 . five_prime_UTR 3059172 3059234 . + . Parent=PAC:18136238;PACid=18136238
scaffold00001 . CDS 3059235 3059330 . + 0 Parent=PAC:18136238;PACid=18136238
scaffold00001 . CDS 3059422 3059514 . + 0 Parent=PAC:18136238;PACid=18136238
scaffold00001 . CDS 3059600 3059659 . + 0 Parent=PAC:18136238;PACid=18136238
scaffold00001 . CDS 3059790 3060062 . + 0 Parent=PAC:18136238;PACid=18136238
scaffold00001 . CDS 3060285 3060359 . + 0 Parent=PAC:18136238;PACid=18136238
scaffold00001 . CDS 3060480 3060536 . + 0 Parent=PAC:18136238;PACid=18136238
scaffold00001 . CDS 3060625 3060765 . + 0 Parent=PAC:18136238;PACid=18136238
scaffold00001 . CDS 3060857 3060907 . + 0 Parent=PAC:18136238;PACid=18136238
scaffold00001 . CDS 3061250 3061345 . + 0 Parent=PAC:18136238;PACid=18136238
scaffold00001 . CDS 3061417 3061500 . + 0 Parent=PAC:18136238;PACid=18136238
scaffold00001 . CDS 3061617 3061719 . + 0 Parent=PAC:18136238;PACid=18136238
scaffold00001 . CDS 3061823 3061905 . + 2 Parent=PAC:18136238;PACid=18136238
scaffold00001 . three_prime_UTR 3061906 3062210 . + . Parent=PAC:18136238;PACid=18136238
scaffold00001 . mRNA 3058460 3062210 . + . ID=PAC:18136239;Name=orange1.1g015646m;PACid=18136239;Parent=orange1.1g015633m.g
scaffold00001 . five_prime_UTR 3059016 3059087 . + . Parent=PAC:18136239;PACid=18136239
scaffold00001 . five_prime_UTR 3059172 3059234 . + . Parent=PAC:18136239;PACid=18136239
scaffold00001 . CDS 3059235 3059330 . + 0 Parent=PAC:18136239;PACid=18136239
scaffold00001 . CDS 3059422 3059514 . + 0 Parent=PAC:18136239;PACid=18136239
scaffold00001 . CDS 3059600 3059659 . + 0 Parent=PAC:18136239;PACid=18136239
scaffold00001 . CDS 3059790 3060062 . + 0 Parent=PAC:18136239;PACid=18136239
scaffold00001 . CDS 3060285 3060359 . + 0 Parent=PAC:18136239;PACid=18136239
scaffold00001 . CDS 3060480 3060536 . + 0 Parent=PAC:18136239;PACid=18136239
scaffold00001 . CDS 3060625 3060765 . + 0 Parent=PAC:18136239;PACid=18136239
scaffold00001 . CDS 3060857 3060907 . + 0 Parent=PAC:18136239;PACid=18136239
scaffold00001 . CDS 3061250 3061345 . + 0 Parent=PAC:18136239;PACid=18136239
scaffold00001 . CDS 3061417 3061500 . + 0 Parent=PAC:18136239;PACid=18136239
scaffold00001 . CDS 3061617 3061719 . + 0 Parent=PAC:18136239;PACid=18136239
scaffold00001 . CDS 3061823 3061905 . + 2 Parent=PAC:18136239;PACid=18136239
scaffold00001 . three_prime_UTR 4061906 4062210 . + . Parent=PAC:18136239;PACid=18136239
scaffold00001 . gene 4058460 4062210 . + . ID=orange1.1g015632m.g;Name=orange1.1g015632m.g;Alias=some-synonym,another synonym;Dbxref=GO:0061611,FOOBAR:6528B;Gap=BLABLA,BLOBLO;Note=that's fantastic,really;Poutrelle=lapinou,test
scaffold00001 . mRNA 4058460 4062210 . + . ID=PAC:18136217;Name=orange1.1g015632m;PACid=18136217;Parent=orange1.1g015632m.g
scaffold00001 . five_prime_UTR 4058460 4058898 . + . Parent=PAC:18136217;PACid=18136217
scaffold00001 . five_prime_UTR 4059019 4059074 . + . Parent=PAC:18136217;PACid=18136217
scaffold00001 . five_prime_UTR 4059172 4059234 . + . Parent=PAC:18136217;PACid=18136217
scaffold00001 . CDS 4059235 4059330 . + 0 Parent=PAC:18136217;PACid=18136217
scaffold00001 . CDS 4059422 4059514 . + 0 Parent=PAC:18136217;PACid=18136217
scaffold00001 . CDS 4059600 4059659 . + 0 Parent=PAC:18136217;PACid=18136217
scaffold00001 . CDS 4059790 4060062 . + 0 Parent=PAC:18136217;PACid=18136217
scaffold00001 . CDS 4060285 4060359 . + 0 Parent=PAC:18136217;PACid=18136217
scaffold00001 . CDS 4060480 4060536 . + 0 Parent=PAC:18136217;PACid=18136217
scaffold00001 . CDS 4060625 4060765 . + 0 Parent=PAC:18136217;PACid=18136217
scaffold00001 . CDS 4060857 4060907 . + 0 Parent=PAC:18136217;PACid=18136217
scaffold00001 . CDS 4061250 4061345 . + 0 Parent=PAC:18136217;PACid=18136217
scaffold00001 . CDS 4061417 4061500 . + 0 Parent=PAC:18136217;PACid=18136217
scaffold00001 . CDS 4061617 4061719 . + 0 Parent=PAC:18136217;PACid=18136217
scaffold00001 . CDS 4061823 4061905 . + 2 Parent=PAC:18136217;PACid=18136217
scaffold00001 . three_prime_UTR 4061906 4062210 . + . Parent=PAC:18136217;PACid=18136217
scaffold00001 . mRNA 4058460 4062210 . + . ID=PAC:18136218;Name=orange1.1g015645m;PACid=18136218;Parent=orange1.1g015632m.g
scaffold00001 . five_prime_UTR 4058460 4058898 . + . Parent=PAC:18136218;PACid=18136218
scaffold00001 . five_prime_UTR 4059016 4059087 . + . Parent=PAC:18136218;PACid=18136218
scaffold00001 . five_prime_UTR 4059172 4059234 . + . Parent=PAC:18136218;PACid=18136218
scaffold00001 . CDS 4059235 4059330 . + 0 Parent=PAC:18136218;PACid=18136218
scaffold00001 . CDS 4059422 4059514 . + 0 Parent=PAC:18136218;PACid=18136218
scaffold00001 . CDS 4059600 4059659 . + 0 Parent=PAC:18136218;PACid=18136218
scaffold00001 . CDS 4059790 4060062 . + 0 Parent=PAC:18136218;PACid=18136218
scaffold00001 . CDS 4060285 4060359 . + 0 Parent=PAC:18136218;PACid=18136218
scaffold00001 . CDS 4060480 4060536 . + 0 Parent=PAC:18136218;PACid=18136218
scaffold00001 . CDS 4060625 4060765 . + 0 Parent=PAC:18136218;PACid=18136218
scaffold00001 . CDS 4060857 4060907 . + 0 Parent=PAC:18136218;PACid=18136218
scaffold00001 . CDS 4061250 4061345 . + 0 Parent=PAC:18136218;PACid=18136218
scaffold00001 . CDS 4061417 4061500 . + 0 Parent=PAC:18136218;PACid=18136218
scaffold00001 . CDS 4061617 4061719 . + 0 Parent=PAC:18136218;PACid=18136218
scaffold00001 . CDS 4061823 4061905 . + 2 Parent=PAC:18136218;PACid=18136218
scaffold00001 . three_prime_UTR 4061906 4062210 . + . Parent=PAC:18136218;PACid=18136218
scaffold00001 . mRNA 4058760 4062210 . + . ID=PAC:18136219;Name=orange1.1g015615m;PACid=18136219;Parent=orange1.1g015632m.g;Alias=some-synonym,another synonym;Dbxref=GO:0061621,FOOBARXX:6528A
scaffold00001 . five_prime_UTR 4058760 4058898 . + . Parent=PAC:18136219;PACid=18136219;Poutrelle=lapino,lapino
scaffold00001 . five_prime_UTR 4059172 4059234 . + . Parent=PAC:18136219;PACid=18136219
scaffold00001 . CDS 4059235 4059330 . + 0 Parent=PAC:18136219;PACid=18136219
scaffold00001 . CDS 4059422 4059514 . + 0 Parent=PAC:18136219;PACid=18136219
scaffold00001 . CDS 4059600 4059659 . + 0 Parent=PAC:18136219;PACid=18136219
scaffold00001 . CDS 4059790 4060062 . + 0 Parent=PAC:18136219;PACid=18136219
scaffold00001 . CDS 4060285 4060359 . + 0 Parent=PAC:18136219;PACid=18136219
scaffold00001 . CDS 4060480 4060536 . + 0 Parent=PAC:18136219;PACid=18136219
scaffold00001 . CDS 4060625 4060765 . + 0 Parent=PAC:18136219;PACid=18136219
scaffold00001 . CDS 4060857 4060907 . + 0 Parent=PAC:18136219;PACid=18136219
scaffold00001 . CDS 4061250 4061345 . + 0 Parent=PAC:18136219;PACid=18136219
scaffold00001 . CDS 4061417 4061500 . + 0 Parent=PAC:18136219;PACid=18136219
scaffold00001 . CDS 4061617 4061719 . + 0 Parent=PAC:18136219;PACid=18136219
scaffold00001 . CDS 4061823 4061905 . + 2 Parent=PAC:18136219;PACid=18136219
scaffold00001 . three_prime_UTR 4061906 4062210 . + . Parent=PAC:18136219;PACid=18136219
scaffold00001 . mRNA 4058460 4062210 . + . ID=PAC:18136220;Name=orange1.1g015662m;PACid=18136220;Parent=orange1.1g015632m.g
scaffold00001 . five_prime_UTR 4058460 4059234 . + . Parent=PAC:18136220;PACid=18136220
scaffold00001 . CDS 4059235 4059330 . + 0 Parent=PAC:18136220;PACid=18136220
scaffold00001 . CDS 4059422 4059514 . + 0 Parent=PAC:18136220;PACid=18136220
scaffold00001 . CDS 4059600 4059659 . + 0 Parent=PAC:18136220;PACid=18136220
scaffold00001 . CDS 4059790 4060062 . + 0 Parent=PAC:18136220;PACid=18136220
scaffold00001 . CDS 4060285 4060359 . + 0 Parent=PAC:18136220;PACid=18136220
scaffold00001 . CDS 4060480 4060536 . + 0 Parent=PAC:18136220;PACid=18136220
scaffold00001 . CDS 4060625 4060765 . + 0 Parent=PAC:18136220;PACid=18136220
scaffold00001 . CDS 4060857 4060907 . + 0 Parent=PAC:18136220;PACid=18136220
scaffold00001 . CDS 4061250 4061345 . + 0 Parent=PAC:18136220;PACid=18136220
scaffold00001 . CDS 4061417 4061500 . + 0 Parent=PAC:18136220;PACid=18136220
scaffold00001 . CDS 4061617 4061719 . + 0 Parent=PAC:18136220;PACid=18136220
scaffold00001 . CDS 4061823 4061905 . + 2 Parent=PAC:18136220;PACid=18136220
scaffold00001 . three_prime_UTR 4061906 4062210 . + . Parent=PAC:18136220;PACid=18136220
scaffold00001 . mRNA 4058460 4062210 . + . ID=PAC:18136221;Name=orange1.1g017341m;PACid=18136221;Parent=orange1.1g015632m.g
scaffold00001 . five_prime_UTR 4058460 4059415 . + . Parent=PAC:18136221;PACid=18136221
scaffold00001 . CDS 4059416 4059514 . + 0 Parent=PAC:18136221;PACid=18136221
scaffold00001 . CDS 4059600 4059659 . + 0 Parent=PAC:18136221;PACid=18136221
scaffold00001 . CDS 4059790 4060062 . + 0 Parent=PAC:18136221;PACid=18136221
scaffold00001 . CDS 4060285 4060359 . + 0 Parent=PAC:18136221;PACid=18136221
scaffold00001 . CDS 4060480 4060536 . + 0 Parent=PAC:18136221;PACid=18136221
scaffold00001 . CDS 4060625 4060765 . + 0 Parent=PAC:18136221;PACid=18136221
scaffold00001 . CDS 4060857 4060907 . + 0 Parent=PAC:18136221;PACid=18136221
scaffold00001 . CDS 4061250 4061345 . + 0 Parent=PAC:18136221;PACid=18136221
scaffold00001 . CDS 4061417 4061500 . + 0 Parent=PAC:18136221;PACid=18136221
scaffold00001 . CDS 4061617 4061719 . + 0 Parent=PAC:18136221;PACid=18136221
scaffold00001 . CDS 4061823 4061905 . + 2 Parent=PAC:18136221;PACid=18136221
scaffold00001 . three_prime_UTR 4061906 4062210 . + . Parent=PAC:18136221;PACid=18136221
scaffold00001 . mRNA 4058460 4062210 . + . ID=PAC:18136222;Name=orange1.1g018514m;PACid=18136222;Parent=orange1.1g015632m.g
scaffold00001 . five_prime_UTR 4058460 4058898 . + . Parent=PAC:18136222;PACid=18136222
scaffold00001 . five_prime_UTR 4059172 4059234 . + . Parent=PAC:18136222;PACid=18136222
scaffold00001 . CDS 4059235 4059330 . + 0 Parent=PAC:18136222;PACid=18136222
scaffold00001 . CDS 4059422 4059514 . + 0 Parent=PAC:18136222;PACid=18136222
scaffold00001 . CDS 4059600 4059659 . + 0 Parent=PAC:18136222;PACid=18136222
scaffold00001 . CDS 4059790 4060062 . + 0 Parent=PAC:18136222;PACid=18136222
scaffold00001 . CDS 4060285 4060359 . + 0 Parent=PAC:18136222;PACid=18136222
scaffold00001 . CDS 4060480 4060536 . + 0 Parent=PAC:18136222;PACid=18136222
scaffold00001 . CDS 4060625 4060765 . + 0 Parent=PAC:18136222;PACid=18136222
scaffold00001 . CDS 4060857 4060907 . + 0 Parent=PAC:18136222;PACid=18136222
scaffold00001 . CDS 4061250 4061345 . + 0 Parent=PAC:18136222;PACid=18136222
scaffold00001 . CDS 4061417 4061500 . + 0 Parent=PAC:18136222;PACid=18136222
scaffold00001 . CDS 4061607 4061645 . + 0 Parent=PAC:18136222;PACid=18136222
scaffold00001 . three_prime_UTR 4061646 4061719 . + . Parent=PAC:18136222;PACid=18136222
scaffold00001 . three_prime_UTR 4061823 4062210 . + . Parent=PAC:18136222;PACid=18136222
scaffold00001 . mRNA 4058460 4061214 . + . ID=PAC:18136223;Name=orange1.1g022520m;PACid=18136223;Parent=orange1.1g015632m.g
scaffold00001 . five_prime_UTR 4058460 4058898 . + . Parent=PAC:18136223;PACid=18136223
scaffold00001 . five_prime_UTR 4059172 4059234 . + . Parent=PAC:18136223;PACid=18136223
scaffold00001 . CDS 4059235 4059330 . + 0 Parent=PAC:18136223;PACid=18136223
scaffold00001 . CDS 4059422 4059514 . + 0 Parent=PAC:18136223;PACid=18136223
scaffold00001 . CDS 4059600 4059659 . + 0 Parent=PAC:18136223;PACid=18136223
scaffold00001 . CDS 4059790 4060062 . + 0 Parent=PAC:18136223;PACid=18136223
scaffold00001 . CDS 4060285 4060359 . + 0 Parent=PAC:18136223;PACid=18136223
scaffold00001 . CDS 4060480 4060536 . + 0 Parent=PAC:18136223;PACid=18136223
scaffold00001 . CDS 4060625 4060765 . + 0 Parent=PAC:18136223;PACid=18136223
scaffold00001 . CDS 4060857 4060952 . + 0 Parent=PAC:18136223;PACid=18136223
scaffold00001 . three_prime_UTR 4060953 4061214 . + . Parent=PAC:18136223;PACid=18136223
scaffold00001 . mRNA 4058460 4062210 . + . ID=PAC:18136224;Name=orange1.1g022799m;PACid=18136224;Parent=orange1.1g015632m.g
scaffold00001 . five_prime_UTR 4058460 4058898 . + . Parent=PAC:18136224;PACid=18136224
scaffold00001 . five_prime_UTR 4059172 4059873 . + . Parent=PAC:18136224;PACid=18136224
scaffold00001 . CDS 4059874 4060062 . + 0 Parent=PAC:18136224;PACid=18136224
scaffold00001 . CDS 4060480 4060536 . + 0 Parent=PAC:18136224;PACid=18136224
scaffold00001 . CDS 4060285 4060359 . + 0 Parent=PAC:18136224;PACid=18136224
scaffold00001 . CDS 4060857 4060907 . + 0 Parent=PAC:18136224;PACid=18136224
scaffold00001 . CDS 4060625 4060765 . + 0 Parent=PAC:18136224;PACid=18136224
scaffold00001 . CDS 4061250 4061345 . + 0 Parent=PAC:18136224;PACid=18136224
scaffold00001 . CDS 4061617 4061719 . + 0 Parent=PAC:18136224;PACid=18136224
scaffold00001 . CDS 4061417 4061500 . + 0 Parent=PAC:18136224;PACid=18136224
scaffold00001 . CDS 4061823 4061905 . + 2 Parent=PAC:18136224;PACid=18136224
scaffold00001 . three_prime_UTR 4061906 4062210 . + . Parent=PAC:18136224;PACid=18136224
scaffold00001 . mRNA 4058460 4062210 . + . ID=PAC:18136225;Name=orange1.1g022797m;PACid=18136225;Parent=orange1.1g015632m.g
scaffold00001 . five_prime_UTR 4058460 4058898 . + . Parent=PAC:18136225;PACid=18136225
scaffold00001 . five_prime_UTR 4059172 4059330 . + . Parent=PAC:18136225;PACid=18136225
scaffold00001 . five_prime_UTR 4059422 4059873 . + . Parent=PAC:18136225;PACid=18136225
scaffold00001 . CDS 4059874 4060062 . + 0 Parent=PAC:18136225;PACid=18136225
scaffold00001 . CDS 4060285 4060359 . + 0 Parent=PAC:18136225;PACid=18136225
scaffold00001 . CDS 4060480 4060536 . + 0 Parent=PAC:18136225;PACid=18136225
scaffold00001 . CDS 4060625 4060765 . + 0 Parent=PAC:18136225;PACid=18136225
scaffold00001 . CDS 4060857 4060907 . + 0 Parent=PAC:18136225;PACid=18136225
scaffold00001 . CDS 4061250 4061345 . + 0 Parent=PAC:18136225;PACid=18136225
scaffold00001 . CDS 4061417 4061500 . + 0 Parent=PAC:18136225;PACid=18136225
scaffold00001 . CDS 4061617 4061719 . + 0 Parent=PAC:18136225;PACid=18136225
scaffold00001 . CDS 4061823 4061905 . + 2 Parent=PAC:18136225;PACid=18136225
scaffold00001 . three_prime_UTR 4061906 4062210 . + . ID="some_special_cds";Parent=PAC:18136225;PACid=18136225;Derives_from=PAC:18136217;Ontology_term=GO:000001,GO:00002;Target=scaffold00001 120 320 -
Loading

0 comments on commit 7f7acac

Please sign in to comment.