[med-svn] [Git][med-team/python-bcbio-gff][upstream] New upstream version 0.7.0
Étienne Mollier (@emollier)
gitlab at salsa.debian.org
Wed Jul 12 21:16:09 BST 2023
Étienne Mollier pushed to branch upstream at Debian Med / python-bcbio-gff
Commits:
112984ef by Étienne Mollier at 2023-07-12T21:56:46+02:00
New upstream version 0.7.0
- - - - -
30 changed files:
- BCBio/GFF/GFFParser.py
- BCBio/GFF/__init__.py
- PKG-INFO
- − Scripts/gff/access_gff_index.py
- − Scripts/gff/genbank_to_gff.py
- − Scripts/gff/gff2_to_gff3.py
- − Scripts/gff/gff_to_biosql.py
- − Scripts/gff/gff_to_genbank.py
- − Tests/GFF/F3-unique-3.v2.gff
- − Tests/GFF/c_elegans_WS199_ann_gff.txt
- − Tests/GFF/c_elegans_WS199_dna_shortened.fa
- − Tests/GFF/c_elegans_WS199_shortened_gff.txt
- − Tests/GFF/ensembl_gtf.txt
- − Tests/GFF/glimmer_nokeyval.gff3
- − Tests/GFF/hybrid1.gff3
- − Tests/GFF/hybrid2.fa
- − Tests/GFF/hybrid2.gff3
- − Tests/GFF/jgi_gff2.txt
- − Tests/GFF/mouse_extra_comma.gff3
- − Tests/GFF/ncbi_gff3.txt
- − Tests/GFF/problem_sequence_region.gff3
- − Tests/GFF/spaces.gff3
- − Tests/GFF/trans_splicing.gff3
- − Tests/GFF/transcripts.gff3
- − Tests/GFF/unescaped-semicolon.gff3
- − Tests/GFF/wormbase_gff2.txt
- − Tests/GFF/wormbase_gff2_alt.txt
- − Tests/test_GFFSeqIOFeatureAdder.py
- bcbio_gff.egg-info/PKG-INFO
- bcbio_gff.egg-info/SOURCES.txt
Changes:
=====================================
BCBio/GFF/GFFParser.py
=====================================
@@ -31,7 +31,15 @@ except AttributeError:
import _utils
collections.defaultdict = _utils.defaultdict
-from Bio.Seq import UnknownSeq
+unknown_seq_avail = False
+try:
+ from Bio.Seq import UnknownSeq
+ unknown_seq_avail = True
+except ImportError:
+ # Starting with biopython 1.81, has been removed
+ from Bio.Seq import _UndefinedSequenceData
+ from Bio.Seq import Seq
+
from Bio.SeqRecord import SeqRecord
from Bio import SeqFeature
from Bio import SeqIO
@@ -69,7 +77,7 @@ def _gff_line_map(line, params):
GFF3 has key value pairs like:
count=9;gene=amx-2;sequence=SAGE:aacggagccg
- GFF2 and GTF have:
+ GFF2 and GTF have:
Sequence "Y74C9A" ; Note "Clone Y74C9A; Genbank AC024206"
name "fgenesh1_pg.C_chr_1000003"; transcriptId 869
"""
@@ -170,7 +178,7 @@ def _gff_line_map(line, params):
should_do = True
if params.limit_info:
for limit_name, limit_values in params.limit_info.items():
- cur_id = tuple([parts[i] for i in
+ cur_id = tuple([parts[i] for i in
params.filter_info[limit_name]])
if cur_id not in limit_values:
should_do = False
@@ -286,7 +294,7 @@ class _AbstractMapReduceGFF:
information.
"""
def __init__(self, create_missing=True):
- """Initialize GFF parser
+ """Initialize GFF parser
create_missing - If True, create blank records for GFF ids not in
the base_dict. If False, an error will be raised.
@@ -305,7 +313,7 @@ class _AbstractMapReduceGFF:
limit_info - A dictionary specifying the regions of the GFF file
which should be extracted. This allows only relevant portions of a file
to be parsed.
-
+
base_dict - A base dictionary of SeqRecord objects which may be
pre-populated with sequences and other features. The new features from
the GFF file will be added to this dictionary.
@@ -536,11 +544,16 @@ class _AbstractMapReduceGFF:
if match_id:
cur_rec = base[match_id]
# update generated unknown sequences with the expected maximum length
- if isinstance(cur_rec.seq, UnknownSeq):
+ if unknown_seq_avail and isinstance(cur_rec.seq, UnknownSeq):
cur_rec.seq._length = max([max_loc, cur_rec.seq._length])
+ elif not unknown_seq_avail and isinstance(cur_rec.seq._data, _UndefinedSequenceData):
+ cur_rec.seq._data._length = max([max_loc, cur_rec.seq._data._length])
return cur_rec, base
elif self._create_missing:
- new_rec = SeqRecord(UnknownSeq(max_loc), info_dict['rec_id'])
+ if unknown_seq_avail:
+ new_rec = SeqRecord(UnknownSeq(max_loc), info_dict['rec_id'])
+ else:
+ new_rec = SeqRecord(Seq(None, length=max_loc), info_dict['rec_id'])
base[info_dict['rec_id']] = new_rec
return new_rec, base
else:
@@ -654,7 +667,7 @@ class GFFParser(_AbstractMapReduceGFF):
def __init__(self, line_adjust_fn=None, create_missing=True):
_AbstractMapReduceGFF.__init__(self, create_missing=create_missing)
self._line_adjust_fn = line_adjust_fn
-
+
def _gff_process(self, gff_files, limit_info, target_lines):
"""Process GFF addition without any parallelization.
@@ -704,7 +717,7 @@ class GFFParser(_AbstractMapReduceGFF):
yield out_info.get_results()
out_info = _GFFParserLocalOut((target_lines is not None and
target_lines > 1))
- if (results and results[0][0] == 'directive' and
+ if (results and results[0][0] == 'directive' and
results[0][1] == 'FASTA'):
found_seqs = True
break
@@ -741,7 +754,7 @@ class DiscoGFFParser(_AbstractMapReduceGFF):
"""
def __init__(self, disco_host, create_missing=True):
"""Initialize parser.
-
+
disco_host - Web reference to a Disco host which will be used for
parallelizing the GFF reading job.
"""
@@ -755,7 +768,7 @@ class DiscoGFFParser(_AbstractMapReduceGFF):
# make these imports local; only need them when using disco
import simplejson
import disco
- # absolute path names unless they are special disco files
+ # absolute path names unless they are special disco files
full_files = []
for f in gff_files:
if f.split(":")[0] != "disco":
@@ -829,7 +842,7 @@ class GFFExaminer:
def __init__(self):
self._filter_info = dict(gff_id = [0], gff_source_type = [1, 2],
gff_source = [1], gff_type = [2])
-
+
def _get_local_params(self, limit_info=None):
class _LocalParams:
def __init__(self):
@@ -838,13 +851,13 @@ class GFFExaminer:
params.limit_info = limit_info
params.filter_info = self._filter_info
return params
-
+
@_file_or_handle
def available_limits(self, gff_handle):
"""Return dictionary information on possible limits for this file.
This returns a nested dictionary with the following structure:
-
+
keys -- names of items to filter by
values -- dictionary with:
keys -- filter choice
@@ -884,7 +897,7 @@ class GFFExaminer:
keys -- tuple of (source, type) for each parent
values -- tuple of (source, type) as children of that parent
-
+
Not a parallelized map-reduce implementation.
"""
# collect all of the parent and child types mapped to IDs
=====================================
BCBio/GFF/__init__.py
=====================================
@@ -3,4 +3,4 @@
from BCBio.GFF.GFFParser import GFFParser, DiscoGFFParser, GFFExaminer, parse, parse_simple
from BCBio.GFF.GFFOutput import GFF3Writer, write
-__version__ = "0.6.9"
+__version__ = "0.7.0"
=====================================
PKG-INFO
=====================================
@@ -1,10 +1,9 @@
-Metadata-Version: 1.0
+Metadata-Version: 2.1
Name: bcbio-gff
-Version: 0.6.9
+Version: 0.7.0
Summary: Read and write Generic Feature Format (GFF) with Biopython integration.
Home-page: https://github.com/chapmanb/bcbb/tree/master/gff
Author: Brad Chapman
Author-email: chapmanb at 50mail.com
License: Biopython License
-Description: UNKNOWN
-Platform: UNKNOWN
+License-File: LICENSE
=====================================
Scripts/gff/access_gff_index.py deleted
=====================================
@@ -1,98 +0,0 @@
-"""Access an GFF file using bx-python's interval indexing.
-
-Requires:
- bx-python: http://bitbucket.org/james_taylor/bx-python/wiki/Home
- gff library: http://github.com/chapmanb/bcbb/tree/master/gff
-
-Index time:
- 44 Mb file
- 11 seconds
- Index is 7.5Mb
-"""
-from __future__ import with_statement
-import os
-import sys
-
-from bx import interval_index_file
-
-from BCBio import GFF
-
-def main(gff_file):
- gff_index = gff_file + ".index"
- if not os.path.exists(gff_index):
- print "Indexing GFF file"
- index(gff_file)
- index = GFFIndexedAccess(gff_file, keep_open=True)
- print index.seqids
- print
- for feature in index.get_features_in_region("Chr2", 17500, 20000):
- print feature
- for feature in index.get_features_in_region("Chr5", 500000, 502500):
- print feature
-
- exam = GFF.GFFExaminer()
- #print exam.available_limits(gff_file)
- #print exam.parent_child_map(gff_file)
-
- found = 0
- limit_info = dict(
- gff_type = ["protein", "gene", "mRNA", "exon", "CDS", "five_prime_UTR",
- "three_prime_UTR"]
- )
- for feature in index.get_features_in_region("Chr1", 0, 50000,
- limit_info):
- found += 1
- print found
-
-class GFFIndexedAccess(interval_index_file.AbstractIndexedAccess):
- """Provide indexed access to a GFF file.
- """
- def __init__(self, *args, **kwargs):
- interval_index_file.AbstractIndexedAccess.__init__(self, *args,
- **kwargs)
- self._parser = GFF.GFFParser()
-
- @property
- def seqids(self):
- return self.indexes.indexes.keys()
-
- def get_features_in_region(self, seqid, start, end, limit_info=None):
- """Retrieve features located on a given region in start/end coordinates.
- """
- limit_info = self._parser._normalize_limit_info(limit_info)
- line_gen = self.get_as_iterator(seqid, int(start), int(end))
- recs = None
- for results in self._parser._lines_to_out_info(line_gen, limit_info):
- assert not recs, "Unexpected multiple results"
- recs = self._parser._results_to_features(dict(), results)
- if recs is None:
- return []
- else:
- assert len(recs) == 1
- rec = recs[seqid]
- return rec.features
-
- def read_at_current_offset(self, handle, **kwargs):
- line = handle.readline()
- return line
-
-def index(gff_file, index_file=None):
- index = interval_index_file.Indexes()
- with open(gff_file) as in_handle:
- while 1:
- pos = in_handle.tell()
- line = in_handle.readline()
- if not line:
- break
- if not line.startswith("#"):
- parts = line.split("\t")
- (seqid, gtype, source, start, end) = parts[:5]
- index.add(seqid, int(start), int(end), pos)
- if index_file is None:
- index_file = gff_file + ".index"
- with open(index_file, "w") as index_handle:
- index.write(index_handle)
- return index_file
-
-if __name__ == "__main__":
- main(*sys.argv[1:])
=====================================
Scripts/gff/genbank_to_gff.py deleted
=====================================
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-"""Convert a GenBank file into GFF format.
-
-Usage:
- genbank_to_gff.py <genbank_file>
-"""
-import sys
-import os
-
-from Bio import SeqIO
-from Bio import Seq
-
-from BCBio import GFF
-
-def main(gb_file):
- out_file = "%s.gff" % os.path.splitext(gb_file)[0]
- with open(out_file, "w") as out_handle:
- GFF.write(SeqIO.parse(gb_file, "genbank"), out_handle)
-
-if __name__ == "__main__":
- main(*sys.argv[1:])
=====================================
Scripts/gff/gff2_to_gff3.py deleted
=====================================
@@ -1,30 +0,0 @@
-#!/usr/bin/env python
-"""Convert a GFF2 file to an updated GFF3 format file.
-
-Usage:
- gff2_to_gff3.py <in_gff2_file>
-
-The output file has the same name with the extension gff3.
-"""
-import sys
-import os
-
-from BCBio.GFF import GFFParser, GFF3Writer
-
-def main(in_file):
- base, ext = os.path.splitext(in_file)
- out_file = "%s.gff3" % (base)
- in_handle = open(in_file)
- out_handle = open(out_file, "w")
- reader = GFFParser()
- writer = GFF3Writer()
- writer.write(reader.parse_in_parts(in_handle, target_lines=25000),
- out_handle)
- in_handle.close()
- out_handle.close()
-
-if __name__ == "__main__":
- if len(sys.argv) != 2:
- print __doc__
- sys.exit()
- main(sys.argv[1])
=====================================
Scripts/gff/gff_to_biosql.py deleted
=====================================
@@ -1,76 +0,0 @@
-#!/usr/bin/env python
-"""Load a fasta file of sequences and associated GFF file into BioSQL.
-
-You will need to adjust the database parameters and have a BioSQL database set
-up. See:
-
-http://biopython.org/wiki/BioSQL
-
-Depending on the size of the sequences being loaded, you may also get errors on
-loading very large chromosome sequences. Updating these options can help:
-
- set global max_allowed_packet=1000000000;
- set global net_buffer_length=1000000;
-
-Usage:
- gff_to_biosql.py <fasta file> <gff file>
-"""
-from __future__ import with_statement
-import sys
-
-from BioSQL import BioSeqDatabase
-from Bio import SeqIO
-
-from BCBio.GFF import GFFParser
-
-def main(seq_file, gff_file):
- # -- To be customized
- # You need to update these parameters to point to your local database
- # XXX demo example could be swapped to use SQLite when that is integrated
- user = "chapmanb"
- passwd = "cdev"
- host = "localhost"
- db_name = "wb199_gff"
- biodb_name = "wb199_gff_cds_pcr"
- # These need to be updated to reflect what you would like to parse
- # out of the GFF file. Set limit_info=None to parse everything, but
- # be sure the file is small or you may deal with memory issues.
- rnai_types = [('Orfeome', 'PCR_product'),
- ('GenePair_STS', 'PCR_product'),
- ('Promoterome', 'PCR_product')]
- gene_types = [('Non_coding_transcript', 'gene'),
- ('Coding_transcript', 'gene'),
- ('Coding_transcript', 'mRNA'),
- ('Coding_transcript', 'CDS')]
- limit_info = dict(gff_source_type = rnai_types + gene_types)
- # --
- print "Parsing FASTA sequence file..."
- with open(seq_file) as seq_handle:
- seq_dict = SeqIO.to_dict(SeqIO.parse(seq_handle, "fasta"))
-
- print "Parsing GFF data file..."
- parser = GFFParser()
- recs = parser.parse(gff_file, seq_dict, limit_info=limit_info)
-
- print "Writing to BioSQL database..."
- server = BioSeqDatabase.open_database(driver="MySQLdb", user=user,
- passwd=passwd, host=host, db=db_name)
- try:
- if biodb_name not in server.keys():
- server.new_database(biodb_name)
- else:
- server.remove_database(biodb_name)
- server.adaptor.commit()
- server.new_database(biodb_name)
- db = server[biodb_name]
- db.load(recs)
- server.adaptor.commit()
- except:
- server.adaptor.rollback()
- raise
-
-if __name__ == "__main__":
- if len(sys.argv) != 3:
- print __doc__
- sys.exit()
- main(sys.argv[1], sys.argv[2])
=====================================
Scripts/gff/gff_to_genbank.py deleted
=====================================
@@ -1,73 +0,0 @@
-#!/usr/bin/env python
-"""Convert a GFF and associated FASTA file into GenBank format.
-
-Usage:
- gff_to_genbank.py <GFF annotation file> [<FASTA sequence file> <molecule type>]
-
- FASTA sequence file: input sequences matching records in GFF. Optional if sequences
- are in the GFF
- molecule type: type of molecule in the GFF file. Defaults to DNA, the most common case.
-"""
-from __future__ import print_function
-
-import sys
-import os
-
-from Bio import SeqIO
-
-from BCBio import GFF
-
-
-def main(gff_file, fasta_file=None, molecule_type="DNA"):
- out_file = "%s.gb" % os.path.splitext(gff_file)[0]
- if fasta_file:
- fasta_input = SeqIO.to_dict(SeqIO.parse(fasta_file, "fasta"))
- else:
- fasta_input = {}
- gff_iter = GFF.parse(gff_file, fasta_input)
- SeqIO.write(_check_gff(_fix_ncbi_id(gff_iter), molecule_type), out_file, "genbank")
-
-
-def _fix_ncbi_id(fasta_iter):
- """GenBank identifiers can only be 16 characters; try to shorten NCBI.
- """
- for rec in fasta_iter:
- if len(rec.name) > 16 and rec.name.find("|") > 0:
- new_id = [x for x in rec.name.split("|") if x][-1]
- print("Warning: shortening NCBI name %s to %s" % (rec.id, new_id))
- rec.id = new_id
- rec.name = new_id
- yield rec
-
-
-def _check_gff(gff_iterator, molecule_type):
- """Check GFF files before feeding to SeqIO to be sure they have sequences.
- """
- for rec in gff_iterator:
- if "molecule_type" not in rec.annotations:
- rec.annotations["molecule_type"] = molecule_type
- yield _flatten_features(rec)
-
-
-def _flatten_features(rec):
- """Make sub_features in an input rec flat for output.
-
- GenBank does not handle nested features, so we want to make
- everything top level.
- """
- out = []
- for f in rec.features:
- cur = [f]
- while len(cur) > 0:
- nextf = []
- for curf in cur:
- out.append(curf)
- if len(curf.sub_features) > 0:
- nextf.extend(curf.sub_features)
- cur = nextf
- rec.features = out
- return rec
-
-
-if __name__ == "__main__":
- main(*sys.argv[1:])
=====================================
Tests/GFF/F3-unique-3.v2.gff deleted
=====================================
@@ -1,128 +0,0 @@
-##solid-gff-version 0.2
-##gff-version 2
-##source-version MaToGff.java v1.5
-##date 2008-05-28
-##time 13:11:03
-##Type solid_read
-##color-code AA=0,AC=1,AG=2,AT=3,CA=1,CC=0,CG=3,CT=2,GA=2,GC=3,GG=0,GT=1,TA=3,TC=2,TG=1,TT=0
-##primer-base F3=T
-##max-num-mismatches 3
-##max-read-length 20
-##line-order fragment
-##history filter_fasta.pl --noduplicates --output=/data/results/DAEMON/DAEMON_MATE_PAIRS_2_20070326/S1/results.01/primary.20071218094706805 --name=DAEMON_MATE_PAIRS_2_20070326_S1 --tag=F3 --minlength=20 --prefix=T /data/results/DAEMON/DAEMON_MATE_PAIRS_2_20070326/S1/jobs/postPrimerSetPrimary.117/rawseq
-##history map /data/results/RegressionDriver/CaseManager/results/r12/integration/case0002/reads1/test_S1_F3.csfasta /data/results/RegressionDriver/CaseManager/knownData/validatedReference/matchingPipeline/ecoli_k12_MG1655.fasta T=30 L=19 C=1 E=.Tmpfile1211939575SVhDtd F=0 B=1 D=1 u=1 r=0 n=1 Z=1000 P="0000000111111111111" M=0 U=0.000000 H=0 > .Tmpfile1211939575SVhDtd.out.1
-##history MaToGff.java --sort --qvs=test_S1_F3_QV.qual.txt --convert=unique --clear=3 --tempdir=../tmp test_S1_F3.csfasta.ma.20.3
-##hdr seqname source feature start end score strand frame [attributes] [comments]
-3_336_815_F3 solid read 55409 55428 10.4 + . g=A3233312322232122211;i=1;p=1.000;q=23,12,18,17,10,24,19,14,27,9,23,9,16,20,11,7,8,4,4,14;u=0,0,0,1
-3_142_1011_F3 solid read 91290 91309 5.0 - . g=T0330222333132222222;i=1;p=1.000;q=4,4,14,4,4,4,4,21,4,4,4,4,25,4,4,4,5,21,4,4;u=0,0,0,1
-3_341_424_F3 solid read 102717 102736 10.6 - . g=T2203031313223113212;i=1;p=1.000;q=9,27,25,16,18,9,27,26,23,13,14,25,27,5,24,5,26,26,4,5;u=0,0,1
-3_6_37_F3 solid read 181053 181072 9.4 + . g=C3220221332111020310;i=1;p=1.000;q=9,5,13,9,10,22,6,12,21,7,13,4,21,16,23,6,20,20,13,6;u=0,0,0,1
-3_34_202_F3 solid read 284207 284226 6.9 + . g=G0301333332232122333;i=1;p=1.000;q=6,15,21,8,12,4,4,5,12,8,4,12,4,7,10,6,8,16,4,6;u=0,1
-3_277_712_F3 solid read 304136 304155 11.8 - . g=A2033101122223322133;i=1;p=1.000;q=26,11,14,27,4,17,4,26,26,23,17,25,26,27,21,23,5,20,26,23;u=0,1
-3_394_71_F3 solid read 308736 308755 10.8 + . g=T3203322323203312331;i=1;p=1.000;q=9,24,19,15,20,18,20,10,13,13,11,21,12,7,4,11,20,24,4,25;u=0,1
-3_285_1497_F3 solid read 404055 404074 8.4 - . g=T1221231003202232221;i=1;p=1.000;q=8,10,6,25,16,14,23,27,8,14,21,19,5,4,4,6,22,12,4,6;u=0,0,0,1
-3_228_178_F3 solid read 453227 453246 9.5 - . g=G1130333332331110323;i=1;p=1.000;q=4,19,25,18,18,5,19,6,8,24,4,26,21,11,15,4,26,13,13,15;u=0,0,0,1
-3_406_794_F3 solid read 504835 504854 8.3 - . g=T3033331301320201111;i=1;p=1.000;q=27,4,13,4,21,11,7,11,5,26,10,8,9,4,6,18,9,26,17,6;u=0,0,0,1
-3_303_251_F3 solid read 561501 561520 5.3 + . g=C0011111112222112221;i=1;p=1.000;q=9,8,4,4,10,4,4,4,6,14,4,4,4,4,16,4,4,4,4,23;u=0,0,1
-3_152_112_F3 solid read 624012 624031 7.7 - . g=G0301122312213122221;i=1;p=1.000;q=22,14,7,13,18,5,11,4,15,6,6,11,4,8,15,5,10,4,6,24;u=0,0,0,1
-3_112_1154_F3 solid read 630582 630601 11.3 - . g=T1333312011131131011;i=1;p=1.000;q=27,27,4,5,17,24,20,19,7,4,25,17,18,15,22,23,17,25,16,26;u=0,0,1
-3_196_392_F3 solid read 661664 661683 19.7 - . g=T3321013301122133323;i=1;p=1.000;q=27,25,13,26,21,25,23,27,27,27,27,11,16,27,27,19,26,27,26,27;u=1
-3_192_1248_F3 solid read 672037 672056 4.5 - . g=A0333232333121222222;i=1;p=1.000;q=4,7,4,4,4,4,4,4,6,4,4,4,4,4,7,7,4,4,6,4;u=0,0,0,1
-3_63_479_F3 solid read 742582 742601 7.9 - . g=A0133333333233232332;i=1;p=1.000;q=4,9,6,11,20,12,11,9,13,20,18,4,4,14,9,15,4,6,21,4;u=0,0,0,1
-3_30_710_F3 solid read 816069 816088 9.2 - . g=T3311001223313333313;i=1;p=1.000;q=22,27,18,25,25,7,26,25,14,23,6,25,5,11,7,4,15,7,4,6;u=0,0,0,1
-3_284_77_F3 solid read 864876 864895 7.4 + . g=T2003133033233112331;i=1;p=1.000;q=13,19,4,11,22,24,6,16,4,6,13,4,12,18,4,6,7,11,4,5;u=0,0,0,1
-3_411_1040_F3 solid read 876023 876042 10.9 - . g=T2121301233200033221;i=1;p=1.000;q=9,9,5,12,11,8,4,16,27,27,18,21,24,9,18,24,21,9,23,17;u=0,0,0,1
-3_188_171_F3 solid read 884683 884702 5.8 - . g=A1322330132213322231;i=1;p=1.000;q=4,8,4,5,7,6,5,4,11,6,6,11,4,8,4,8,4,6,4,15;u=0,0,0,1
-3_63_787_F3 solid read 1022149 1022168 7.5 + . g=C3131132013020123031;i=1;p=1.000;q=12,13,26,14,9,9,13,14,4,7,8,5,11,4,17,4,4,6,4,21;u=0,1
-3_391_2015_F3 solid read 1074989 1075008 18.5 - . g=A2323101222321232322;i=1;p=1.000;q=27,25,18,20,27,27,24,23,27,23,27,25,19,26,12,26,9,21,27,21;u=1
-3_8_425_F3 solid read 1119124 1119143 6.7 - . g=T0321201132230303323;i=1;p=1.000;q=6,5,8,6,4,4,23,9,12,10,15,4,13,13,8,4,4,5,5,12;u=0,0,1
-3_53_745_F3 solid read 1130179 1130198 7.6 - . g=C0213313233333113321;i=1;p=1.000;q=27,6,9,22,18,9,8,15,6,8,14,5,8,6,16,4,5,4,4,14;u=0,0,0,1
-3_123_576_F3 solid read 1219122 1219141 8.7 + . g=A3333133323333323323;i=1;p=1.000;q=18,22,5,11,16,16,8,14,8,5,19,8,9,10,7,11,6,11,9,4;u=0,0,1
-3_81_12_F3 solid read 1236732 1236751 8.6 + . g=G2210332302233112321;i=1;p=1.000;q=7,16,17,9,7,9,9,16,9,4,10,21,17,8,4,6,9,16,6,12;u=0,0,0,1
-3_96_1862_F3 solid read 1264409 1264428 6.9 - . g=G0301032323231222021;i=1;p=1.000;q=26,23,11,20,15,8,6,4,6,6,9,7,6,4,8,6,4,5,6,5;u=0,0,0,1
-3_40_136_F3 solid read 1266177 1266196 7.4 - . g=T2332222332203312221;i=1;p=1.000;q=9,23,6,19,13,9,4,8,17,9,4,4,13,9,8,5,4,6,10,8;u=0,0,1
-3_124_1781_F3 solid read 1385416 1385435 10.3 + . g=A1322302333332222132;i=1;p=1.000;q=13,17,8,6,5,9,24,4,7,9,18,27,18,16,16,23,18,18,11,23;u=0,0,1
-3_134_1165_F3 solid read 1393169 1393188 9.0 - . g=T3301123202321131311;i=1;p=1.000;q=4,27,18,7,27,4,27,26,4,20,4,27,26,9,27,4,27,14,10,27;u=1
-3_224_587_F3 solid read 1490044 1490063 6.1 + . g=G2032313231111233321;i=1;p=1.000;q=4,4,6,6,13,24,4,4,5,15,6,7,9,14,4,4,4,25,5,5;u=0,0,0,1
-3_25_747_F3 solid read 1513598 1513617 9.5 + . g=T1223213101133121231;i=1;p=1.000;q=26,27,8,27,27,27,26,27,26,19,8,14,4,17,11,5,7,4,7,6;u=0,0,1
-3_143_14_F3 solid read 1528236 1528255 9.7 + . g=T3233113323230202011;i=1;p=1.000;q=13,23,17,19,23,16,24,25,14,15,9,6,4,11,4,9,12,4,16,10;u=0,0,0,1
-3_164_1025_F3 solid read 1570107 1570126 7.9 - . g=T3220332323303320231;i=1;p=1.000;q=7,10,20,8,4,24,4,4,21,6,26,22,9,6,11,9,6,4,17,14;u=0,0,0,1
-3_137_552_F3 solid read 1630276 1630295 9.1 - . g=G3030333223233102131;i=1;p=1.000;q=6,28,9,4,6,26,27,6,10,9,27,21,6,16,9,25,6,7,23,12;u=0,0,0,1
-3_125_1810_F3 solid read 1634104 1634123 10.5 + . g=G1232220322032311332;i=1;p=1.000;q=27,8,26,26,10,6,26,12,27,27,26,4,27,27,23,8,8,4,27,12;u=0,0,0,1
-3_314_1310_F3 solid read 1639981 1640000 9.2 + . g=A2221332230322203033;i=1;p=1.000;q=19,12,6,27,11,27,6,11,5,6,9,13,27,27,8,18,5,22,4,27;u=0,0,0,1
-3_384_591_F3 solid read 1654341 1654360 6.8 + . g=A3323221133121102313;i=1;p=1.000;q=19,8,7,7,15,4,20,7,4,6,14,7,19,6,8,4,5,9,4,4;u=0,0,0,1
-3_145_739_F3 solid read 1791040 1791059 11.9 - . g=A0221223333323131212;i=1;p=1.000;q=20,27,23,13,27,14,27,28,27,25,12,24,8,16,8,4,8,21,9,11;u=0,0,0,1
-3_326_2020_F3 solid read 1830564 1830583 9.3 + . g=A3321322331103233322;i=1;p=1.000;q=14,4,25,16,10,12,16,5,14,10,25,5,25,5,9,18,13,26,4,26;u=0,0,0,1
-3_233_1265_F3 solid read 1857564 1857583 8.9 + . g=T3112113020130223311;i=1;p=1.000;q=7,27,25,26,27,14,26,27,27,27,4,6,5,10,17,4,5,7,6,12;u=0,0,1
-3_235_100_F3 solid read 1912460 1912479 9.6 - . g=G2233020000132311231;i=1;p=1.000;q=23,24,25,16,17,6,21,25,9,4,6,11,8,19,6,6,19,14,13,6;u=0,0,0,1
-3_111_107_F3 solid read 1944496 1944515 7.6 - . g=C3023223333211322231;i=1;p=1.000;q=15,5,6,14,5,13,4,12,11,4,9,9,11,12,4,11,11,13,6,6;u=0,0,0,1
-3_457_1514_F3 solid read 1956598 1956617 9.9 - . g=T0013331013332110221;i=1;p=1.000;q=18,24,10,24,23,25,22,11,20,10,15,11,4,5,27,4,9,13,5,27;u=0,1
-3_183_74_F3 solid read 1992040 1992059 9.8 + . g=C3332233131131222322;i=1;p=1.000;q=27,27,25,23,25,8,11,11,7,11,4,12,14,10,15,7,14,4,9,12;u=0,0,1
-3_357_1303_F3 solid read 2037917 2037936 10.9 - . g=T3331331323320311331;i=1;p=1.000;q=7,27,5,19,26,8,27,12,14,27,8,27,23,9,19,4,26,20,9,27;u=0,0,0,1
-3_153_186_F3 solid read 2083441 2083460 6.7 + . g=T3112233331133323322;i=1;p=1.000;q=7,14,19,7,12,6,11,4,11,8,4,6,6,4,11,4,6,4,4,18;u=0,1
-3_65_1741_F3 solid read 2107441 2107460 8.4 + . g=T3333332330233132123;i=1;p=1.000;q=4,4,6,25,9,4,26,16,21,9,18,15,27,27,4,21,9,7,9,6;u=0,0,0,1
-3_98_323_F3 solid read 2118821 2118840 7.5 + . g=A3222212322131112031;i=1;p=1.000;q=13,14,8,10,8,14,4,13,10,7,15,4,6,4,4,12,6,11,6,8;u=0,0,1
-3_48_258_F3 solid read 2153882 2153901 9.4 - . g=G0330113313201122321;i=1;p=1.000;q=22,15,20,4,16,17,14,24,4,5,4,22,19,8,10,9,13,22,8,15;u=0,0,0,1
-3_140_1125_F3 solid read 2182909 2182928 7.9 + . g=T3231331302232001131;i=1;p=1.000;q=10,4,12,6,4,12,13,6,18,5,8,11,4,26,6,25,5,18,11,12;u=0,0,0,1
-3_359_118_F3 solid read 2188393 2188412 8.4 + . g=A0301311133331131322;i=1;p=1.000;q=11,5,7,13,20,6,6,25,8,18,9,15,27,9,6,7,15,17,4,4;u=0,0,0,1
-3_203_483_F3 solid read 2272874 2272893 9.1 - . g=C3031223110333133311;i=1;p=1.000;q=23,21,25,27,10,5,22,15,17,18,5,18,17,5,19,4,4,13,4,22;u=0,0,0,1
-3_66_301_F3 solid read 2286038 2286057 6.6 - . g=C1113113330132222311;i=1;p=1.000;q=10,4,6,4,8,13,9,4,10,9,4,6,13,9,5,6,11,6,4,9;u=0,0,0,1
-3_78_130_F3 solid read 2291021 2291040 7.6 + . g=G3233131332212222321;i=1;p=1.000;q=13,16,6,12,17,11,10,4,12,8,13,4,8,6,4,4,12,10,4,11;u=0,0,0,1
-3_141_110_F3 solid read 2291354 2291373 9.3 + . g=T1312203322212123321;i=1;p=1.000;q=9,21,24,11,16,4,23,27,16,16,8,22,6,10,16,4,9,4,7,25;u=0,0,1
-3_51_1383_F3 solid read 2374918 2374937 8.8 + . g=T3311203033322222231;i=1;p=1.000;q=24,26,6,27,27,23,27,4,21,27,4,27,6,9,24,4,23,4,4,27;u=0,0,1
-3_231_366_F3 solid read 2392091 2392110 10.0 - . g=T2022333223101331322;i=1;p=1.000;q=18,12,9,9,13,8,7,22,7,7,4,26,12,17,9,20,24,8,18,14;u=0,0,0,1
-3_214_1802_F3 solid read 2394604 2394623 8.8 - . g=T1232111001220211133;i=1;p=1.000;q=17,18,14,6,19,4,21,4,6,12,11,4,26,20,9,18,7,16,5,18;u=0,0,0,1
-3_67_1434_F3 solid read 2454508 2454527 15.2 - . g=T3121311232222231203;i=1;p=1.000;q=9,27,27,18,16,14,25,27,26,21,19,27,27,27,15,5,24,27,24,24;u=0,0,1
-3_124_1647_F3 solid read 2493617 2493636 7.5 + . g=A0211320203220231332;i=1;p=1.000;q=9,12,12,9,6,14,12,7,4,4,12,9,4,9,16,4,4,9,9,16;u=0,0,0,1
-3_39_328_F3 solid read 2500759 2500778 7.8 + . g=T1332333033231132333;i=1;p=1.000;q=24,27,26,26,25,21,7,8,4,5,20,4,11,6,8,4,6,4,11,7;u=0,0,1
-3_378_322_F3 solid read 2541624 2541643 8.9 + . g=T2333331001023011220;i=1;p=1.000;q=14,6,13,25,27,4,24,22,14,19,9,23,15,6,8,4,22,4,4,20;u=0,0,0,1
-3_216_848_F3 solid read 2550573 2550592 11.5 - . g=G2320322020031220322;i=1;p=1.000;q=21,24,8,21,20,25,18,6,24,14,21,9,7,18,8,18,7,9,19,12;u=0,0,0,1
-3_221_516_F3 solid read 2607559 2607578 11.1 - . g=T2132333313222333332;i=1;p=1.000;q=9,19,27,26,24,26,26,25,25,26,21,4,6,10,21,6,20,13,5,24;u=0,0,0,1
-3_56_45_F3 solid read 2662103 2662122 5.5 + . g=G3021122332232122321;i=1;p=1.000;q=4,4,4,6,4,6,4,5,18,9,4,16,10,4,4,4,12,4,6,6;u=0,0,0,1
-3_127_210_F3 solid read 2798906 2798925 10.2 + . g=G2331321333232203222;i=1;p=1.000;q=11,25,9,4,23,16,26,14,7,22,9,25,9,8,21,8,15,17,4,26;u=0,0,1
-3_417_422_F3 solid read 2812322 2812341 8.8 - . g=T3321222333313333132;i=1;p=1.000;q=9,26,7,19,7,13,23,4,25,4,6,19,4,16,15,15,23,4,19,13;u=0,0,0,1
-3_42_1403_F3 solid read 2830264 2830283 9.6 - . g=T3212330132120221212;i=1;p=1.000;q=7,4,25,18,6,17,12,12,17,14,8,26,13,15,10,4,21,5,12,22;u=0,1
-3_457_42_F3 solid read 2874245 2874264 7.6 - . g=G0301123332223122221;i=1;p=1.000;q=18,10,14,9,19,4,10,8,11,10,6,8,5,8,11,4,13,6,4,6;u=0,0,1
-3_361_728_F3 solid read 2893879 2893898 14.6 + . g=C3213223312310132221;i=1;p=1.000;q=14,18,7,7,17,19,23,24,17,26,12,15,21,23,21,19,17,20,22,24;u=0,0,0,1
-3_77_718_F3 solid read 2913092 2913111 9.4 + . g=T3021331333313131231;i=1;p=1.000;q=15,26,7,24,20,18,5,6,17,18,6,11,4,13,19,15,7,4,22,25;u=0,0,0,1
-3_116_154_F3 solid read 2917672 2917691 9.8 - . g=A0323231223233132311;i=1;p=1.000;q=20,9,19,18,10,18,8,16,25,6,18,6,12,24,6,7,5,15,7,17;u=0,0,0,1
-3_239_1415_F3 solid read 2923256 2923275 19.2 + . g=T3233113121300032200;i=1;p=1.000;q=25,27,27,26,27,24,27,27,25,27,22,27,21,26,22,19,26,9,14,21;u=1
-3_142_1468_F3 solid read 2930117 2930136 10.5 - . g=A3233323333303103330;i=1;p=1.000;q=9,20,6,26,16,18,8,13,20,25,25,18,6,12,11,18,4,16,16,6;u=0,0,1
-3_394_295_F3 solid read 2930118 2930137 8.1 - . g=T3023333333333311331;i=1;p=1.000;q=4,14,6,12,7,22,10,4,13,24,18,12,12,4,6,9,9,9,14,4;u=0,0,0,1
-3_222_1773_F3 solid read 2934040 2934059 11.6 + . g=T1303031311123232302;i=1;p=1.000;q=11,10,24,15,28,6,19,5,13,27,8,26,8,22,25,27,26,27,8,13;u=0,0,0,1
-3_276_1344_F3 solid read 2969950 2969969 13.2 - . g=G3211212131233322233;i=1;p=1.000;q=27,27,12,16,11,23,27,8,23,12,27,22,20,12,15,25,8,27,16,6;u=0,1
-3_155_1814_F3 solid read 3107393 3107412 13.6 + . g=A2332222213113120221;i=1;p=1.000;q=27,26,20,25,26,27,12,27,26,18,26,4,27,10,23,26,6,23,26,26;u=0,0,0,1
-3_373_2014_F3 solid read 3143956 3143975 12.0 - . g=T3013322223222221211;i=1;p=1.000;q=16,8,17,21,10,10,18,18,18,13,4,23,16,24,8,19,14,15,23,11;u=0,1
-3_81_1637_F3 solid read 3413619 3413638 9.1 + . g=G2313032322122302111;i=1;p=1.000;q=9,4,7,19,27,6,11,5,12,15,20,27,8,27,6,16,6,27,21,6;u=0,0,1
-3_291_969_F3 solid read 3438323 3438342 17.4 + . g=T0021120212032121313;i=1;p=1.000;q=24,27,6,27,27,27,27,13,27,27,25,27,26,27,27,20,23,26,27,20;u=1
-3_179_1617_F3 solid read 3475164 3475183 8.0 + . g=A2100132222332123123;i=1;p=1.000;q=21,25,11,22,4,19,7,21,20,4,5,24,25,16,4,4,11,19,4,4;u=0,0,0,1
-3_446_861_F3 solid read 3476173 3476192 11.6 - . g=G1213302212022132321;i=1;p=1.000;q=27,27,27,27,26,25,12,27,24,18,24,6,27,26,20,9,6,6,4,23;u=0,0,1
-3_397_317_F3 solid read 3545152 3545171 11.1 + . g=T3110031332233111131;i=1;p=1.000;q=22,27,9,9,26,5,22,20,9,10,16,22,24,6,23,25,22,4,17,18;u=0,0,0,1
-3_323_713_F3 solid read 3575287 3575306 16.2 - . g=A0322222200213223302;i=1;p=1.000;q=27,25,21,27,26,26,24,26,27,18,27,26,26,27,22,22,6,26,25,8;u=0,1
-3_294_1906_F3 solid read 3727542 3727561 8.4 - . g=A3030310223202311021;i=1;p=1.000;q=14,7,5,4,7,18,4,6,13,6,12,12,10,11,15,14,16,7,9,12;u=0,0,0,1
-3_443_223_F3 solid read 3730805 3730824 17.1 - . g=T1113320033330133111;i=1;p=1.000;q=28,27,18,27,27,27,20,26,27,14,25,16,19,19,8,23,16,21,16,15;u=0,0,1
-3_94_809_F3 solid read 3841898 3841917 21.8 - . g=A2032223110001131310;i=1;p=1.000;q=27,27,27,27,26,27,25,24,27,27,27,25,27,27,27,12,23,16,27,27;u=0,0,0,1
-3_245_387_F3 solid read 3878549 3878568 24.4 - . g=A0222211220333132122;i=1;p=1.000;q=27,27,26,27,26,27,27,25,27,25,26,27,18,21,26,25,26,23,24,24;u=1
-3_190_1089_F3 solid read 3900038 3900057 13.7 - . g=T1111110323122301202;i=1;p=1.000;q=27,11,27,11,8,9,27,9,9,26,25,27,11,27,23,14,24,20,22,26;u=0,0,1
-3_442_1501_F3 solid read 3912610 3912629 8.5 + . g=A0012333103302132301;i=1;p=1.000;q=11,11,15,19,15,6,12,10,4,11,21,5,9,16,7,14,4,4,8,19;u=0,0,1
-3_342_678_F3 solid read 4044575 4044594 4.0 + . g=A3333112332213322323;i=1;p=1.000;q=4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4;u=0,0,0,1
-3_56_1294_F3 solid read 4058789 4058808 12.7 + . g=G3323331232322213322;i=1;p=1.000;q=26,17,18,27,23,8,8,24,27,27,9,27,25,14,26,4,27,9,24,23;u=0,0,0,1
-3_69_1575_F3 solid read 4070467 4070486 9.9 + . g=A2222011012222112121;i=1;p=1.000;q=16,25,14,9,9,9,21,9,4,24,6,21,13,6,27,10,19,8,6,27;u=0,0,0,1
-3_198_476_F3 solid read 4080622 4080641 8.9 + . g=C2010231122212011133;i=1;p=1.000;q=16,8,8,16,12,17,4,16,12,15,10,4,9,6,4,25,9,9,23,11;u=0,1
-3_24_715_F3 solid read 4136503 4136522 4.0 - . g=G1313332132232313233;i=1;p=1.000;q=4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4;u=0,0,0,1
-3_151_283_F3 solid read 4148264 4148283 9.7 + . g=T3230210232022111220;i=1;p=1.000;q=9,14,6,25,25,19,6,4,16,11,12,20,10,13,26,19,6,4,19,14;u=0,0,1
-3_164_774_F3 solid read 4156157 4156176 9.6 + . g=G2311112210110223313;i=1;p=1.000;q=8,24,19,7,6,16,12,9,4,8,26,14,26,24,7,18,6,16,14,7;u=0,0,0,1
-3_275_1212_F3 solid read 4171385 4171404 8.3 + . g=G0223122231333302232;i=1;p=1.000;q=13,8,5,4,10,7,12,25,4,25,6,15,6,27,6,11,12,7,14,10;u=0,0,0,1
-3_148_289_F3 solid read 4177672 4177691 8.0 - . g=T1203101332223323323;i=1;p=1.000;q=9,21,11,6,5,7,25,24,26,24,8,9,7,12,7,4,11,9,4,4;u=0,0,0,1
-3_437_1000_F3 solid read 4179623 4179642 12.3 + . g=A0112222212231131001;i=1;p=1.000;q=26,27,26,27,4,27,17,6,22,13,27,24,6,27,21,27,22,15,24,9;u=0,0,1
-3_318_2011_F3 solid read 4218181 4218200 12.9 - . g=T2133330223033303323;i=1;p=1.000;q=25,27,27,5,5,16,27,16,27,15,18,25,26,11,27,19,16,24,9,15;u=0,0,0,1
-3_14_11_F3 solid read 4222697 4222716 7.8 - . g=T2323310222232322122;i=1;p=1.000;q=6,23,16,25,25,9,7,4,12,4,14,6,10,7,6,9,18,4,10,4;u=0,0,0,1
-3_402_391_F3 solid read 4274545 4274564 6.2 - . g=C3303323321111111111;i=1;p=1.000;q=10,19,15,15,7,8,13,4,7,4,5,16,4,4,5,4,9,4,4,4;u=0,0,0,1
-3_293_504_F3 solid read 4339235 4339254 9.5 + . g=C2133223303331120213;i=1;p=1.000;q=6,4,5,26,13,7,17,6,24,10,27,24,5,9,21,9,23,24,20,14;u=0,0,0,1
-3_360_914_F3 solid read 4407004 4407023 10.7 + . g=T3012102130232022001;i=1;p=1.000;q=23,24,19,17,24,6,26,17,25,15,7,24,14,11,26,9,22,4,8,5;u=0,0,0,1
-3_118_1532_F3 solid read 4431702 4431721 10.2 + . g=C3233220201223200322;i=1;p=1.000;q=20,9,17,22,17,23,13,4,9,5,16,11,10,6,17,7,9,22,27,27;u=0,0,1
-3_358_133_F3 solid read 4460191 4460210 9.1 + . g=T0221223112322112233;i=1;p=1.000;q=6,23,12,22,7,6,7,4,13,5,9,23,12,9,24,8,14,7,20,26;u=0,0,0,1
-3_397_195_F3 solid read 4499390 4499409 6.9 - . g=T3302332313332212121;i=1;p=1.000;q=23,14,15,5,9,8,6,4,4,13,4,16,13,16,4,7,4,12,4,5;u=0,0,0,1
-3_158_642_F3 solid read 4533144 4533163 7.1 - . g=A1332103332323233212;i=1;p=1.000;q=8,20,9,22,8,14,4,16,17,4,8,13,7,8,4,12,5,4,4,4;u=0,0,0,1
-3_300_1439_F3 solid read 4580452 4580471 12.3 - . g=A0331111211302100201;i=1;p=1.000;q=5,17,21,14,4,16,11,27,21,9,17,17,27,23,12,21,16,27,25,25;u=0,0,0,1
-# Elapsed time 0.846 secs
=====================================
Tests/GFF/c_elegans_WS199_ann_gff.txt deleted
=====================================
@@ -1,2 +0,0 @@
-# modified GFF file to remove location coordinates and test annotations
-I Expr_profile experimental_result_region . . . + . expr_profile=B0019.1
=====================================
Tests/GFF/c_elegans_WS199_dna_shortened.fa deleted
=====================================
@@ -1,21 +0,0 @@
->I
-gcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaa
-gcctaagcctaagcctaagcctaagcctaagcctaagcct
->II
-cctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaag
-cctaagcctaagcctaagcctaagcctaagcctaagccta
->III
-cctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaag
-cctaagcctaagcctaagcctaagcctaagcctaagccta
->IV
-cctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaag
-cctaagcctaagcctaagcctaagcctaagcctaagccta
->V
-gaattcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagc
-ctaagcctaagcctaagcctaagcctaagcctaagcctaa
->X
-ctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagc
-ctaagcctaagcctaagcctaagcctaagcctaagcctaa
->MtDNA
-cagtaaatagtttaataaaaatatagcatttgggttgctaagatattattactgatagaa
-tttttagtttaatttagaatgtatcacttacaatgatggg
=====================================
Tests/GFF/c_elegans_WS199_shortened_gff.txt deleted
=====================================
@@ -1,177 +0,0 @@
-I Orfeome PCR_product 12759747 12764936 . - . amplified=1;pcr_product=mv_B0019.1
-I SAGE_tag_unambiguously_mapped SAGE_tag 12763533 12763553 . - . count=1;gene=amx-2;sequence=SAGE:ggcagagtcttttggca;transcript=B0019.1
-I SAGE_tag_unambiguously_mapped SAGE_tag 12761492 12761512 . - . count=5;gene=amx-2;sequence=SAGE:aacggagccgtacacgc;transcript=B0019.1
-I SAGE_tag_most_three_prime SAGE_tag 12761499 12761512 . - . count=9;gene=amx-2;sequence=SAGE:aacggagccg;transcript=B0019.1
-X SAGE_tag SAGE_tag 6819353 6819366 . + . count=9;gene=amx-2;sequence=SAGE:aacggagccg;transcript=B0019.1
-I Expr_profile experimental_result_region 12762449 12764118 . + . expr_profile=B0019.1
-I Coding_transcript CDS 12759745 12759828 . - 0 ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
-I Coding_transcript CDS 12759949 12760013 . - 2 ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
-I Coding_transcript CDS 12760227 12760319 . - 2 ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
-I Coding_transcript CDS 12760365 12760494 . - 0 ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
-I Coding_transcript CDS 12760834 12760904 . - 2 ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
-I Coding_transcript CDS 12761172 12761516 . - 2 ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
-I Coding_transcript CDS 12761799 12761953 . - 1 ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
-I Coding_transcript CDS 12762127 12762268 . - 2 ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
-I Coding_transcript CDS 12762648 12762806 . - 2 ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
-I Coding_transcript CDS 12763112 12763249 . - 2 ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
-I Coding_transcript CDS 12763448 12763655 . - 0 ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
-I Coding_transcript CDS 12763729 12763882 . - 1 ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
-I Coding_transcript CDS 12763979 12764102 . - 2 ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
-I Coding_transcript CDS 12764291 12764471 . - 0 ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
-I Coding_transcript CDS 12764812 12764937 . - 0 ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
-I history CDS 12759745 12759828 . - 0 ID=CDS:B0019.1:wp173
-I history CDS 12759949 12760013 . - 2 ID=CDS:B0019.1:wp173
-I history CDS 12760227 12760319 . - 2 ID=CDS:B0019.1:wp173
-I history CDS 12760365 12760494 . - 0 ID=CDS:B0019.1:wp173
-I history CDS 12760834 12760904 . - 2 ID=CDS:B0019.1:wp173
-I history CDS 12761172 12761516 . - 2 ID=CDS:B0019.1:wp173
-I history CDS 12761577 12761626 . - 1 ID=CDS:B0019.1:wp173
-I history CDS 12761795 12761953 . - 1 ID=CDS:B0019.1:wp173
-I history CDS 12762127 12762268 . - 2 ID=CDS:B0019.1:wp173
-I history CDS 12762648 12762806 . - 2 ID=CDS:B0019.1:wp173
-I history CDS 12763112 12763249 . - 2 ID=CDS:B0019.1:wp173
-I history CDS 12763448 12763655 . - 0 ID=CDS:B0019.1:wp173
-I history CDS 12763729 12763882 . - 1 ID=CDS:B0019.1:wp173
-I history CDS 12763979 12764102 . - 2 ID=CDS:B0019.1:wp173
-I history CDS 12764291 12764471 . - 0 ID=CDS:B0019.1:wp173
-I history CDS 12764812 12764937 . - 0 ID=CDS:B0019.1:wp173
-I history CDS 12759745 12759828 . - 0 ID=CDS:B0019.1:wp90
-I history CDS 12759949 12760013 . - 2 ID=CDS:B0019.1:wp90
-I history CDS 12760227 12760319 . - 2 ID=CDS:B0019.1:wp90
-I history CDS 12761172 12761516 . - 2 ID=CDS:B0019.1:wp90
-I history CDS 12761577 12761626 . - 1 ID=CDS:B0019.1:wp90
-I history CDS 12761795 12761953 . - 1 ID=CDS:B0019.1:wp90
-I history CDS 12762127 12762268 . - 2 ID=CDS:B0019.1:wp90
-I history CDS 12762648 12762806 . - 2 ID=CDS:B0019.1:wp90
-I history CDS 12763112 12763249 . - 2 ID=CDS:B0019.1:wp90
-I history CDS 12763469 12763655 . - 0 ID=CDS:B0019.1:wp90
-I history CDS 12763729 12763882 . - 1 ID=CDS:B0019.1:wp90
-I history CDS 12763979 12764102 . - 2 ID=CDS:B0019.1:wp90
-I history CDS 12764291 12764471 . - 0 ID=CDS:B0019.1:wp90
-I history CDS 12764812 12764937 . - 0 ID=CDS:B0019.1:wp90
-I mass_spec_genome translated_nucleotide_match 12761920 12761953 . - . ID=Target:381130;Target=Mass_spec_peptide:MSP:FADFSPLDVSDVNFATDDLAK 10 21 +;Note=MSP:FADFSPLDVSDVNFATDDLAK;cds_matches=B0019.1;protein_matches=WP:CE40797;times_observed=3
-I mass_spec_genome translated_nucleotide_match 12762127 12762155 . - . ID=Target:381130;Target=Mass_spec_peptide:MSP:FADFSPLDVSDVNFATDDLAK 1 10 +;Note=MSP:FADFSPLDVSDVNFATDDLAK;cds_matches=B0019.1;protein_matches=WP:CE40797;times_observed=3
-I mass_spec_genome translated_nucleotide_match 12763506 12763559 . - . ID=Target:381133;Target=Mass_spec_peptide:MSP:FGHGQSLLAQGGMNEVVR 1 18 +;Note=MSP:FGHGQSLLAQGGMNEVVR;cds_matches=B0019.1;protein_matches=WP:CE40797;times_observed=1
-I mass_spec_genome translated_nucleotide_match 12764361 12764411 . - . ID=Target:381144;Target=Mass_spec_peptide:MSP:NIQQNRPGLSVLVLEAR 1 17 +;Note=MSP:NIQQNRPGLSVLVLEAR;cds_matches=B0019.1;protein_matches=WP:CE40797;times_observed=2
-I Coding_transcript mRNA 12759582 12764949 . - . ID=Transcript:B0019.1;Note=amx-2;Parent=Gene:WBGene00000138;cds=B0019.1;prediction_status=Partially_confirmed;wormpep=CE:CE40797
-I Allele SNP 12764272 12764272 . + . interpolated_map_position=14.003;rflp=No;variation=snp_B0019[1]
-I Oligo_set reagent 12759745 12761589 . - . oligo_set=Aff_B0019.1
-I Coding_transcript exon 12759745 12759828 . - 0 Parent=Transcript:B0019.1
-I Coding_transcript exon 12759949 12760013 . - 2 Parent=Transcript:B0019.1
-I Coding_transcript exon 12760227 12760319 . - 2 Parent=Transcript:B0019.1
-I Coding_transcript exon 12760365 12760494 . - 0 Parent=Transcript:B0019.1
-I Coding_transcript exon 12760834 12760904 . - 2 Parent=Transcript:B0019.1
-I Coding_transcript exon 12761172 12761516 . - 2 Parent=Transcript:B0019.1
-I Coding_transcript exon 12761799 12761953 . - 1 Parent=Transcript:B0019.1
-I Coding_transcript exon 12762127 12762268 . - 2 Parent=Transcript:B0019.1
-I Coding_transcript exon 12762648 12762806 . - 2 Parent=Transcript:B0019.1
-I Coding_transcript exon 12763112 12763249 . - 2 Parent=Transcript:B0019.1
-I Coding_transcript exon 12763448 12763655 . - 0 Parent=Transcript:B0019.1
-I Coding_transcript exon 12763729 12763882 . - 1 Parent=Transcript:B0019.1
-I Coding_transcript exon 12763979 12764102 . - 2 Parent=Transcript:B0019.1
-I Coding_transcript exon 12764291 12764471 . - 0 Parent=Transcript:B0019.1
-I Coding_transcript exon 12764812 12764937 . - 0 Parent=Transcript:B0019.1
-I Coding_transcript five_prime_UTR 12764938 12764949 . - . Parent=Transcript:B0019.1
-I Coding_transcript three_prime_UTR 12759582 12759744 . - . Parent=Transcript:B0019.1
-I Coding_transcript intron 12760495 12760833 . - . Parent=Transcript:B0019.1;confirmed_est=EC027594
-I Coding_transcript intron 12760905 12761171 . - . Parent=Transcript:B0019.1;confirmed_est=EC027594
-I Coding_transcript intron 12761517 12761798 . - . Parent=Transcript:B0019.1;confirmed_est=EC027594
-I Coding_transcript intron 12759829 12759948 . - . Parent=Transcript:B0019.1;confirmed_est=EC034652
-I Coding_transcript intron 12760014 12760226 . - . Parent=Transcript:B0019.1;confirmed_est=EC034652
-I Coding_transcript intron 12760320 12760364 . - . Parent=Transcript:B0019.1;confirmed_est=yk1054h04.3
-I Coding_transcript intron 12763883 12763978 . - . Parent=Transcript:B0019.1;confirmed_est=yk1054h04.5,OSTF088D9_1
-I Coding_transcript intron 12764103 12764290 . - . Parent=Transcript:B0019.1;confirmed_est=yk1054h04.5,OSTF088D9_1
-I Coding_transcript intron 12764472 12764811 . - . Parent=Transcript:B0019.1;confirmed_est=yk1054h04.5,OSTF088D9_1
-I Coding_transcript intron 12762807 12763111 . - . Parent=Transcript:B0019.1;confirmed_est=yk1056c07.5
-I Coding_transcript intron 12763250 12763447 . - . Parent=Transcript:B0019.1;confirmed_est=yk1056c07.5
-I Coding_transcript intron 12763656 12763728 . - . Parent=Transcript:B0019.1;confirmed_est=yk1056c07.5
-I Coding_transcript intron 12761954 12762126 . - . Parent=Transcript:B0019.1;confirmed_est=yk262g9.5
-I Coding_transcript intron 12762269 12762647 . - . Parent=Transcript:B0019.1;confirmed_est=yk262g9.5
-I Promoterome PCR_product 12764938 12766937 . + . pcr_product=p_B0019.1_93
-I GenePair_STS PCR_product 12762449 12764118 . + . pcr_product=sjj_B0019.1
-I Coding_transcript gene 12759582 12764949 . - . ID=Gene:WBGene00000138
-III Orfeome PCR_product 13780230 13780850 . + . amplified=1;pcr_product=mv_3R5.1.v6
-IV Orfeome PCR_product 17486939 17488952 . - . amplified=1;pcr_product=mv_4R79.1
-IV Orfeome PCR_product 17480353 17483284 . - . amplified=1;pcr_product=mv_4R79.2
-X Orfeome PCR_product 17714881 17718531 . + . amplified=1;pcr_product=mv_6R55.1
-X Orfeome PCR_product 17712787 17714742 . + . amplified=1;pcr_product=mv_6R55.2
-II Orfeome PCR_product 6995874 7010146 . + . amplified=1;pcr_product=mv_AAA03517
-III Orfeome PCR_product 5625097 5631795 . + . amplified=1;pcr_product=mv_AAA03544
-X GenePair_STS PCR_product 9962853 9963737 . + . pcr_product=cenix:102-c3
-II GenePair_STS PCR_product 5507236 5508135 . + . pcr_product=cenix:102-c4
-V GenePair_STS PCR_product 10117842 10118735 . + . pcr_product=cenix:102-c5
-IV GenePair_STS PCR_product 3566130 3567025 . + . pcr_product=cenix:102-c6
-X GenePair_STS PCR_product 6117180 6117930 . + . pcr_product=cenix:102-c7
-IV GenePair_STS PCR_product 7189492 7190369 . + . pcr_product=cenix:102-c9
-II GenePair_STS PCR_product 14462527 14463202 . + . pcr_product=cenix:102-d1
-X Promoterome PCR_product 2258069 2259336 . + . pcr_product=p_AH9.2_93
-IV Promoterome PCR_product 12157449 12159448 . + . pcr_product=p_B0001.6_93
-I Promoterome PCR_product 12764938 12766937 . + . pcr_product=p_B0019.1_93
-V Promoterome PCR_product 10320122 10320689 . + . pcr_product=p_B0024.12_93
-I Coding_transcript CDS 4581214 4581237 . - 0 ID=CDS:D1007.5b;Parent=Transcript:D1007.5b.2,Transcript:D1007.5b.1;status=Confirmed;wormpep=WP:CE33577
-I Coding_transcript CDS 4581664 4582026 . - 0 ID=CDS:D1007.5b;Parent=Transcript:D1007.5b.2,Transcript:D1007.5b.1;status=Confirmed;wormpep=WP:CE33577
-I Coding_transcript CDS 4582412 4582718 . - 1 ID=CDS:D1007.5b;Parent=Transcript:D1007.5b.2,Transcript:D1007.5b.1;status=Confirmed;wormpep=WP:CE33577
-I Coding_transcript CDS 4583190 4583374 . - 0 ID=CDS:D1007.5b;Parent=Transcript:D1007.5b.2,Transcript:D1007.5b.1;status=Confirmed;wormpep=WP:CE33577
-I Coding_transcript CDS 4583426 4583509 . - 0 ID=CDS:D1007.5b;Parent=Transcript:D1007.5b.2,Transcript:D1007.5b.1;status=Confirmed;wormpep=WP:CE33577
-I Coding_transcript CDS 4583560 4583805 . - 0 ID=CDS:D1007.5b;Parent=Transcript:D1007.5b.2,Transcript:D1007.5b.1;status=Confirmed;wormpep=WP:CE33577
-I Coding_transcript mRNA 4580734 4583815 . - . ID=Transcript:D1007.5b.1;Parent=Gene:WBGene00017003;cds=D1007.5b;prediction_status=Confirmed;wormpep=WP:CE33577
-I Coding_transcript mRNA 4581214 4583811 . - . ID=Transcript:D1007.5b.2;Parent=Gene:WBGene00017003;cds=D1007.5b;prediction_status=Confirmed;wormpep=WP:CE33577
-I Coding_transcript exon 4581214 4581237 . - 0 Parent=Transcript:D1007.5b.1
-I Coding_transcript exon 4581664 4582026 . - 0 Parent=Transcript:D1007.5b.1
-I Coding_transcript exon 4582412 4582718 . - 1 Parent=Transcript:D1007.5b.1
-I Coding_transcript exon 4583190 4583374 . - 0 Parent=Transcript:D1007.5b.1
-I Coding_transcript exon 4583426 4583509 . - 0 Parent=Transcript:D1007.5b.1
-I Coding_transcript exon 4583560 4583805 . - 0 Parent=Transcript:D1007.5b.1
-I Coding_transcript five_prime_UTR 4583806 4583815 . - . Parent=Transcript:D1007.5b.1
-I Coding_transcript three_prime_UTR 4580734 4581213 . - . Parent=Transcript:D1007.5b.1
-I Coding_transcript intron 4582027 4582411 . - . Parent=Transcript:D1007.5b.1;confirmed_est=EB994038
-I Coding_transcript intron 4583375 4583425 . - . Parent=Transcript:D1007.5b.1;confirmed_est=EC038345,OSTF085G5_1
-I Coding_transcript intron 4583510 4583559 . - . Parent=Transcript:D1007.5b.1;confirmed_est=EC038345,OSTF085G5_1
-I Coding_transcript intron 4582719 4583189 . - . Parent=Transcript:D1007.5b.1;confirmed_est=yk1055g06.5,OSTF085G5_1
-I Coding_transcript intron 4581238 4581663 . - . Parent=Transcript:D1007.5b.1;confirmed_est=yk1057e08.3
-I Coding_transcript exon 4581214 4581237 . - 0 Parent=Transcript:D1007.5b.2
-I Coding_transcript exon 4581664 4582026 . - 0 Parent=Transcript:D1007.5b.2
-I Coding_transcript exon 4582412 4582718 . - 1 Parent=Transcript:D1007.5b.2
-I Coding_transcript exon 4583190 4583374 . - 0 Parent=Transcript:D1007.5b.2
-I Coding_transcript exon 4583426 4583509 . - 0 Parent=Transcript:D1007.5b.2
-I Coding_transcript exon 4583560 4583805 . - 0 Parent=Transcript:D1007.5b.2
-I Coding_transcript five_prime_UTR 4583806 4583811 . - . Parent=Transcript:D1007.5b.2
-I Coding_transcript intron 4582027 4582411 . - . Parent=Transcript:D1007.5b.2;confirmed_est=EB994038
-I Coding_transcript intron 4583375 4583425 . - . Parent=Transcript:D1007.5b.2;confirmed_est=EC038345,OSTF085G5_1
-I Coding_transcript intron 4583510 4583559 . - . Parent=Transcript:D1007.5b.2;confirmed_est=EC038345,OSTF085G5_1
-I Coding_transcript intron 4582719 4583189 . - . Parent=Transcript:D1007.5b.2;confirmed_est=yk1055g06.5,OSTF085G5_1
-I Coding_transcript intron 4581238 4581663 . - . Parent=Transcript:D1007.5b.2;confirmed_est=yk1057e08.3
-I Coding_transcript gene 4580693 4583815 . - . ID=Gene:WBGene00017003
-I SAGE_tag_unambiguously_mapped SAGE_tag 4581093 4581113 . - . count=10;gene=D1007.5;sequence=SAGE:tttgcgaattacttgct;transcript=D1007.5b.1,D1007.5a
-I SAGE_tag_unambiguously_mapped SAGE_tag 4580748 4580768 . - . count=112;gene=D1007.5;sequence=SAGE:ttttccattaattttga;transcript=D1007.5b.1,D1007.5a
-I SAGE_tag_unambiguously_mapped SAGE_tag 4582415 4582428 . - . count=1;gene=D1007.5;sequence=SAGE:cattttcgtg;transcript=D1007.5b.2,D1007.5b.1,D1007.5a
-I SAGE_tag_unambiguously_mapped SAGE_tag 4580914 4580927 . - . count=1;gene=D1007.5;sequence=SAGE:taaatttcaa;transcript=D1007.5b.1,D1007.5a
-I SAGE_tag_unambiguously_mapped SAGE_tag 4581193 4581206 . - . count=1;gene=D1007.5;sequence=SAGE:tgctcgttcg;transcript=D1007.5b.1,D1007.5a
-I SAGE_tag_unambiguously_mapped SAGE_tag 4583465 4583478 . - . count=1;gene=D1007.5;sequence=SAGE:tgttggcctt;transcript=D1007.5b.2,D1007.5b.1,D1007.5a
-I SAGE_tag_unambiguously_mapped SAGE_tag 4583458 4583478 . - . count=1;gene=D1007.5;sequence=SAGE:tgttggccttttacttg;transcript=D1007.5b.2,D1007.5b.1,D1007.5a
-I SAGE_tag_unambiguously_mapped SAGE_tag 4582533 4582553 . - . count=2;gene=D1007.5;sequence=SAGE:tgcagtgatagtccagc;transcript=D1007.5b.2,D1007.5b.1,D1007.5a
-I SAGE_tag_unambiguously_mapped SAGE_tag 4581100 4581113 . - . count=2;gene=D1007.5;sequence=SAGE:tttgcgaatt;transcript=D1007.5b.1,D1007.5a
-I SAGE_tag_unambiguously_mapped SAGE_tag 4580755 4580768 . - . count=43;gene=D1007.5;sequence=SAGE:ttttccatta;transcript=D1007.5b.1,D1007.5a
-I Coding_transcript CDS 4580993 4581241 . - 0 ID=CDS:D1007.5a;Parent=Transcript:D1007.5a;status=Confirmed;wormpep=CE:CE29034
-I Coding_transcript CDS 4581664 4582026 . - 0 ID=CDS:D1007.5a;Parent=Transcript:D1007.5a;status=Confirmed;wormpep=CE:CE29034
-I Coding_transcript CDS 4582412 4582718 . - 1 ID=CDS:D1007.5a;Parent=Transcript:D1007.5a;status=Confirmed;wormpep=CE:CE29034
-I Coding_transcript CDS 4583190 4583374 . - 0 ID=CDS:D1007.5a;Parent=Transcript:D1007.5a;status=Confirmed;wormpep=CE:CE29034
-I Coding_transcript CDS 4583426 4583509 . - 0 ID=CDS:D1007.5a;Parent=Transcript:D1007.5a;status=Confirmed;wormpep=CE:CE29034
-I Coding_transcript CDS 4583560 4583805 . - 0 ID=CDS:D1007.5a;Parent=Transcript:D1007.5a;status=Confirmed;wormpep=CE:CE29034
-I mass_spec_genome translated_nucleotide_match 4580996 4581052 . - . ID=Target:277116;Target=Mass_spec_peptide:MSP:IYEPSQEDLLLMHQLQQER 1 19 +;Note=MSP:IYEPSQEDLLLMHQLQQER;cds_matches=D1007.5a;protein_matches=WP:CE29034;times_observed=1
-I mass_spec_genome translated_nucleotide_match 4581838 4581882 . - . ID=Target:277138;Target=Mass_spec_peptide:MSP:AAIHLGSWHQIEGPR 1 15 +;Note=MSP:AAIHLGSWHQIEGPR;cds_matches=D1007.5b D1007.5a;protein_matches=WP:CE33577 WP:CE29034;times_observed=1
-I mass_spec_genome translated_nucleotide_match 4583581 4583601 . - . ID=Target:277176;Target=Mass_spec_peptide:MSP:TLWWLPK 1 7 +;Note=MSP:TLWWLPK;cds_matches=D1007.5b D1007.5a;protein_matches=WP:CE33577 WP:CE29034;times_observed=1
-I Coding_transcript mRNA 4580693 4583811 . - . ID=Transcript:D1007.5a;Parent=Gene:WBGene00017003;cds=D1007.5a;prediction_status=Confirmed;wormpep=CE:CE29034
-I Coding_transcript exon 4580993 4581241 . - 0 Parent=Transcript:D1007.5a
-I Coding_transcript exon 4581664 4582026 . - 0 Parent=Transcript:D1007.5a
-I Coding_transcript exon 4582412 4582718 . - 1 Parent=Transcript:D1007.5a
-I Coding_transcript exon 4583190 4583374 . - 0 Parent=Transcript:D1007.5a
-I Coding_transcript exon 4583426 4583509 . - 0 Parent=Transcript:D1007.5a
-I Coding_transcript exon 4583560 4583805 . - 0 Parent=Transcript:D1007.5a
-I Coding_transcript five_prime_UTR 4583806 4583811 . - . Parent=Transcript:D1007.5a
-I Coding_transcript three_prime_UTR 4580693 4580992 . - . Parent=Transcript:D1007.5a
-I Coding_transcript intron 4582027 4582411 . - . Parent=Transcript:D1007.5a;confirmed_est=EB994038
-I Coding_transcript intron 4581242 4581663 . - . Parent=Transcript:D1007.5a;confirmed_est=EB994038,OSTR085G5_1
-I Coding_transcript intron 4583375 4583425 . - . Parent=Transcript:D1007.5a;confirmed_est=EC038345,OSTF085G5_1
-I Coding_transcript intron 4583510 4583559 . - . Parent=Transcript:D1007.5a;confirmed_est=EC038345,OSTF085G5_1
-I Coding_transcript intron 4582719 4583189 . - . Parent=Transcript:D1007.5a;confirmed_est=yk1055g06.5,OSTF085G5_1
=====================================
Tests/GFF/ensembl_gtf.txt deleted
=====================================
@@ -1,33 +0,0 @@
-I snoRNA exon 3747 3909 . - . gene_id "Y74C9A.6"; transcript_id "Y74C9A.6"; exon_number "1"; gene_name "Y74C9A.6"; transcript_name "NR_001477.2";
-I protein_coding exon 12764812 12764949 . - . gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "1"; gene_name "amx-2"; transcript_name "B0019.1";
-I protein_coding CDS 12764812 12764937 . - 0 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "1"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I protein_coding start_codon 12764935 12764937 . - 0 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "1"; gene_name "amx-2"; transcript_name "B0019.1";
-I protein_coding exon 12764291 12764471 . - . gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "2"; gene_name "amx-2"; transcript_name "B0019.1";
-I protein_coding CDS 12764291 12764471 . - 0 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "2"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I protein_coding exon 12763979 12764102 . - . gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "3"; gene_name "amx-2"; transcript_name "B0019.1";
-I protein_coding CDS 12763979 12764102 . - 2 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "3"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I protein_coding exon 12763729 12763882 . - . gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "4"; gene_name "amx-2"; transcript_name "B0019.1";
-I protein_coding CDS 12763729 12763882 . - 1 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "4"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I protein_coding exon 12763448 12763655 . - . gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "5"; gene_name "amx-2"; transcript_name "B0019.1";
-I protein_coding CDS 12763448 12763655 . - 0 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "5"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I protein_coding exon 12763112 12763249 . - . gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "6"; gene_name "amx-2"; transcript_name "B0019.1";
-I protein_coding CDS 12763112 12763249 . - 2 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "6"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I protein_coding exon 12762648 12762806 . - . gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "7"; gene_name "amx-2"; transcript_name "B0019.1";
-I protein_coding CDS 12762648 12762806 . - 2 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "7"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I protein_coding exon 12762127 12762268 . - . gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "8"; gene_name "amx-2"; transcript_name "B0019.1";
-I protein_coding CDS 12762127 12762268 . - 2 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "8"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I protein_coding exon 12761799 12761953 . - . gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "9"; gene_name "amx-2"; transcript_name "B0019.1";
-I protein_coding CDS 12761799 12761953 . - 1 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "9"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I protein_coding exon 12761172 12761516 . - . gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "10"; gene_name "amx-2"; transcript_name "B0019.1";
-I protein_coding CDS 12761172 12761516 . - 2 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "10"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I protein_coding exon 12760834 12760904 . - . gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "11"; gene_name "amx-2"; transcript_name "B0019.1";
-I protein_coding CDS 12760834 12760904 . - 2 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "11"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I protein_coding exon 12760365 12760494 . - . gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "12"; gene_name "amx-2"; transcript_name "B0019.1";
-I protein_coding CDS 12760365 12760494 . - 0 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "12"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I protein_coding exon 12760227 12760319 . - . gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "13"; gene_name "amx-2"; transcript_name "B0019.1";
-I protein_coding CDS 12760227 12760319 . - 2 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "13"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I protein_coding exon 12759949 12760013 . - . gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "14"; gene_name "amx-2"; transcript_name "B0019.1";
-I protein_coding CDS 12759949 12760013 . - 2 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "14"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I protein_coding exon 12759579 12759828 . - . gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "15"; gene_name "amx-2"; transcript_name "B0019.1";
-I protein_coding CDS 12759748 12759828 . - 0 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "15"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I protein_coding stop_codon 12759745 12759747 . - 0 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "15"; gene_name "amx-2"; transcript_name "B0019.1";
=====================================
Tests/GFF/glimmer_nokeyval.gff3 deleted
=====================================
@@ -1,6 +0,0 @@
-##gff-version 3
-##sequence-region scaffold4215_3 1 6526
-scaffold4215_3 glimmer gene 3 62 . - . ID=GL0000006;Name=GL0000006;Lack 3'-end;
-scaffold4215_3 glimmer mRNA 3 62 . - . ID=GL0000006;Name=GL0000006;Parent=GL0000006;Lack 3'-end;
-scaffold4215_3 glimmer CDS 3 62 2.84 - 0 Parent=GL0000006;Lack 3'-end;
-scaffold4215_3 glimmer gene 124 1983 . - . ID=GL0000007;Name=GL0000007;Complete;
=====================================
Tests/GFF/hybrid1.gff3 deleted
=====================================
@@ -1,17 +0,0 @@
-##gff-version 3
-##sequence-region foo 1 100
-##feature-ontology bar
-##attribute-ontology baz
-##source-ontology boo
-##sequence-region chr17 62467934 62469545
-chr17 UCSC mRNA 62467934 62469545 . - . ID=A00469;Dbxref=AFFX-U133:205840_x_at,Locuslink:2688,Genbank-mRNA:A00469,Swissprot:P01241,PFAM:PF00103,AFFX-U95:1332_f_at,Swissprot:SOMA_HUMAN;Note=growth%20hormone%201;Alias=GH1
-chr17 UCSC CDS 62468039 62468236 . - 1 Parent=A00469
-chr17 UCSC CDS 62468490 62468654 . - 2 Parent=A00469
-chr17 UCSC CDS 62468747 62468866 . - 1 Parent=A00469
-chr17 UCSC CDS 62469076 62469236 . - 1 Parent=A00469
-chr17 UCSC CDS 62469497 62469506 . - 0 Parent=A00469
-###
-##FASTA
->chr17
-GATTACA
-GATTACA
=====================================
Tests/GFF/hybrid2.fa deleted
=====================================
@@ -1,3 +0,0 @@
->lcl|chr17
-GATTACA
-GATTACA
=====================================
Tests/GFF/hybrid2.gff3 deleted
=====================================
@@ -1,17 +0,0 @@
-##gff-version 3
-##sequence-region foo 1 100
-##feature-ontology bar
-##attribute-ontology baz
-##source-ontology boo
-##sequence-region chr17 62467934 62469545
-chr17 UCSC mRNA 62467934 62469545 . - . ID=A00469;Dbxref=AFFX-U133:205840_x_at,Locuslink:2688,Genbank-mRNA:A00469,Swissprot:P01241,PFAM:PF00103,AFFX-U95:1332_f_at,Swissprot:SOMA_HUMAN;Note=growth%20hormone%201;Alias=GH1
-chr17 UCSC CDS 62468039 62468236 . - 1 Parent=A00469
-chr17 UCSC CDS 62468490 62468654 . - 2 Parent=A00469
-chr17 UCSC CDS 62468747 62468866 . - 1 Parent=A00469
-chr17 UCSC CDS 62469076 62469236 . - 1 Parent=A00469
-chr17 UCSC CDS 62469497 62469506 . - 0 Parent=A00469
-###
-##FASTA
->lcl|chr17
-GATTACA
-GATTACA
=====================================
Tests/GFF/jgi_gff2.txt deleted
=====================================
@@ -1,6 +0,0 @@
-chr_1 JGI exon 37061 37174 . - . name "fgenesh1_pg.C_chr_1000007"; transcriptId 873
-chr_1 JGI CDS 37061 37174 . - 0 name "fgenesh1_pg.C_chr_1000007"; proteinId 873; exonNumber 3
-chr_1 JGI exon 37315 37620 . - . name "fgenesh1_pg.C_chr_1000007"; transcriptId 873
-chr_1 JGI CDS 37315 37620 . - 0 name "fgenesh1_pg.C_chr_1000007"; proteinId 873; exonNumber 2
-chr_1 JGI exon 37752 38216 . - . name "fgenesh1_pg.C_chr_1000007"; transcriptId 873
-chr_1 JGI CDS 37752 38216 . - 0 name "fgenesh1_pg.C_chr_1000007"; proteinId 873; exonNumber 1
=====================================
Tests/GFF/mouse_extra_comma.gff3 deleted
=====================================
@@ -1,17 +0,0 @@
-chr17 RefSeq gene 6797760 6818159 . + . ID=NC_000083.5:LOC100040603;Name=NC_000083.5:LOC100040603
-chr17 RefSeq mRNA 6797760 6818159 . + . ID=XM_001475631.1;Parent=NC_000083.5:LOC100040603
-chr17 RefSeq protein 6806527 6812289 . + . ID=;Parent=XM_001475631.1
-chr17 RefSeq five_prime_UTR 6797760 6797769 . + . Parent=XM_001475631.1
-chr17 RefSeq five_prime_UTR 6806513 6806526 . + . Parent=XM_001475631.1
-chr17 RefSeq CDS 6806527 6806553 . + 0 Name=CDS:NC_000083.5:LOC100040603;Parent=XM_001475631.1,
-chr17 RefSeq CDS 6808204 6808245 . + 0 Name=CDS:NC_000083.5:LOC100040603;Parent=XM_001475631.1,
-chr17 RefSeq CDS 6811330 6811453 . + 0 Name=CDS:NC_000083.5:LOC100040603;Parent=XM_001475631.1,
-chr17 RefSeq CDS 6811792 6811869 . + 2 Name=CDS:NC_000083.5:LOC100040603;Parent=XM_001475631.1,
-chr17 RefSeq CDS 6812219 6812289 . + 2 Name=CDS:NC_000083.5:LOC100040603;Parent=XM_001475631.1,
-chr17 RefSeq three_prime_UTR 6812290 6818159 . + . Parent=XM_001475631.1
-chr17 RefSeq exon 6797760 6797769 . + . Parent=XM_001475631.1
-chr17 RefSeq exon 6806513 6806553 . + . Parent=XM_001475631.1
-chr17 RefSeq exon 6808204 6808245 . + . Parent=XM_001475631.1
-chr17 RefSeq exon 6811330 6811453 . + . Parent=XM_001475631.1
-chr17 RefSeq exon 6811792 6811869 . + . Parent=XM_001475631.1
-chr17 RefSeq exon 6812219 6818159 . + . Parent=XM_001475631.1
=====================================
Tests/GFF/ncbi_gff3.txt deleted
=====================================
@@ -1,21 +0,0 @@
-##gff-version 3
-##source-version NCBI C++ formatter 0.2
-##date 2009-04-25
-##Type DNA NC_008596.1
-NC_008596.1 RefSeq gene 12272 13301 . + . locus_tag=MSMEG_0013;note=ferric%20enterobactin%20transport%20system%20permease%20protein%20FepG%3B%20this%20gene%20contains%20a%20frame%20shift%20which%20is%20not%20the%20result%20of%20sequencing%20error%3B%20identified%20by%20match%20to%20protein%20family%20HMM%20PF01032;pseudo=;db_xref=GeneID:4537201
-NC_008596.1 RefSeq gene 1137579 1138550 . + . ID=NC_008596.1:speB;locus_tag=MSMEG_1072;db_xref=GeneID:4535378
-NC_008596.1 RefSeq CDS 1137579 1138547 . + 0 ID=NC_008596.1:speB:unknown_transcript_1;Parent=NC_008596.1:speB;locus_tag=MSMEG_1072;EC_number=3.5.3.11;note=identified%20by%20match%20to%20protein%20family%20HMM%20PF00491%3B%20match%20to%20protein%20family%20HMM%20TIGR01230;transl_table=11;product=agmatinase;protein_id=YP_885468.1;db_xref=GI:118469242;db_xref=GeneID:4535378;exon_number=1
-NC_008596.1 RefSeq start_codon 1137579 1137581 . + 0 ID=NC_008596.1:speB:unknown_transcript_1;Parent=NC_008596.1:speB;locus_tag=MSMEG_1072;EC_number=3.5.3.11;note=identified%20by%20match%20to%20protein%20family%20HMM%20PF00491%3B%20match%20to%20protein%20family%20HMM%20TIGR01230;transl_table=11;product=agmatinase;protein_id=YP_885468.1;db_xref=GI:118469242;db_xref=GeneID:4535378;exon_number=1
-NC_008596.1 RefSeq stop_codon 1138548 1138550 . + 0 ID=NC_008596.1:speB:unknown_transcript_1;Parent=NC_008596.1:speB;locus_tag=MSMEG_1072;EC_number=3.5.3.11;note=identified%20by%20match%20to%20protein%20family%20HMM%20PF00491%3B%20match%20to%20protein%20family%20HMM%20TIGR01230;transl_table=11;product=agmatinase;protein_id=YP_885468.1;db_xref=GI:118469242;db_xref=GeneID:4535378;exon_number=1
-NC_008596.1 RefSeq gene 3597069 3598112 . + . ID=NC_008596.1:speB;locus_tag=MSMEG_3535;db_xref=GeneID:4533678
-NC_008596.1 RefSeq CDS 3597069 3598109 . + 0 ID=NC_008596.1:speB:unknown_transcript_2;Parent=NC_008596.1:speB;locus_tag=MSMEG_3535;EC_number=3.5.3.11;note=identified%20by%20match%20to%20protein%20family%20HMM%20PF00491%3B%20match%20to%20protein%20family%20HMM%20TIGR01230;transl_table=11;product=agmatinase;protein_id=YP_887838.1;db_xref=GI:118470943;db_xref=GeneID:4533678;exon_number=1
-NC_008596.1 RefSeq start_codon 3597069 3597071 . + 0 ID=NC_008596.1:speB:unknown_transcript_2;Parent=NC_008596.1:speB;locus_tag=MSMEG_3535;EC_number=3.5.3.11;note=identified%20by%20match%20to%20protein%20family%20HMM%20PF00491%3B%20match%20to%20protein%20family%20HMM%20TIGR01230;transl_table=11;product=agmatinase;protein_id=YP_887838.1;db_xref=GI:118470943;db_xref=GeneID:4533678;exon_number=1
-NC_008596.1 RefSeq stop_codon 3598110 3598112 . + 0 ID=NC_008596.1:speB:unknown_transcript_2;Parent=NC_008596.1:speB;locus_tag=MSMEG_3535;EC_number=3.5.3.11;note=identified%20by%20match%20to%20protein%20family%20HMM%20PF00491%3B%20match%20to%20protein%20family%20HMM%20TIGR01230;transl_table=11;product=agmatinase;protein_id=YP_887838.1;db_xref=GI:118470943;db_xref=GeneID:4533678;exon_number=1
-NC_008596.1 RefSeq gene 4460713 4461672 . - . ID=NC_008596.1:speB;locus_tag=MSMEG_4374;db_xref=GeneID:4535424
-NC_008596.1 RefSeq CDS 4460716 4461672 . - 0 ID=NC_008596.1:speB:unknown_transcript_3;Parent=NC_008596.1:speB;locus_tag=MSMEG_4374;EC_number=3.5.3.11;note=identified%20by%20match%20to%20protein%20family%20HMM%20PF00491%3B%20match%20to%20protein%20family%20HMM%20TIGR01230;transl_table=11;product=agmatinase;protein_id=YP_888649.1;db_xref=GI:118469662;db_xref=GeneID:4535424;exon_number=1
-NC_008596.1 RefSeq start_codon 4461670 4461672 . - 0 ID=NC_008596.1:speB:unknown_transcript_3;Parent=NC_008596.1:speB;locus_tag=MSMEG_4374;EC_number=3.5.3.11;note=identified%20by%20match%20to%20protein%20family%20HMM%20PF00491%3B%20match%20to%20protein%20family%20HMM%20TIGR01230;transl_table=11;product=agmatinase;protein_id=YP_888649.1;db_xref=GI:118469662;db_xref=GeneID:4535424;exon_number=1
-NC_008596.1 RefSeq stop_codon 4460713 4460715 . - 0 ID=NC_008596.1:speB:unknown_transcript_3;Parent=NC_008596.1:speB;locus_tag=MSMEG_4374;EC_number=3.5.3.11;note=identified%20by%20match%20to%20protein%20family%20HMM%20PF00491%3B%20match%20to%20protein%20family%20HMM%20TIGR01230;transl_table=11;product=agmatinase;protein_id=YP_888649.1;db_xref=GI:118469662;db_xref=GeneID:4535424;exon_number=1
-NC_008596.1 RefSeq gene 4539385 4540344 . + . ID=NC_008596.1:speB;locus_tag=MSMEG_4459;db_xref=GeneID:4537057
-NC_008596.1 RefSeq CDS 4539385 4540341 . + 0 ID=NC_008596.1:speB:unknown_transcript_4;Parent=NC_008596.1:speB;locus_tag=MSMEG_4459;EC_number=3.5.3.11;note=identified%20by%20match%20to%20protein%20family%20HMM%20PF00491%3B%20match%20to%20protein%20family%20HMM%20TIGR01230;transl_table=11;product=agmatinase;protein_id=YP_888732.1;db_xref=GI:118472833;db_xref=GeneID:4537057;exon_number=1
-NC_008596.1 RefSeq start_codon 4539385 4539387 . + 0 ID=NC_008596.1:speB:unknown_transcript_4;Parent=NC_008596.1:speB;locus_tag=MSMEG_4459;EC_number=3.5.3.11;note=identified%20by%20match%20to%20protein%20family%20HMM%20PF00491%3B%20match%20to%20protein%20family%20HMM%20TIGR01230;transl_table=11;product=agmatinase;protein_id=YP_888732.1;db_xref=GI:118472833;db_xref=GeneID:4537057;exon_number=1
-NC_008596.1 RefSeq stop_codon 4540342 4540344 . + 0 ID=NC_008596.1:speB:unknown_transcript_4;Parent=NC_008596.1:speB;locus_tag=MSMEG_4459;EC_number=3.5.3.11;note=identified%20by%20match%20to%20protein%20family%20HMM%20PF00491%3B%20match%20to%20protein%20family%20HMM%20TIGR01230;transl_table=11;product=agmatinase;protein_id=YP_888732.1;db_xref=GI:118472833;db_xref=GeneID:4537057;exon_number=1
=====================================
Tests/GFF/problem_sequence_region.gff3 deleted
=====================================
@@ -1,7 +0,0 @@
-##gff-version 3
-#!gff-spec-version 1.21
-#!processor NCBI annotwriter
-##sequence-region 1 2482535
-##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=1282
-1 Local region 1 2482535 . + . ID=1:1..2482535;Dbxref=taxon:1282;Is_circular=true;Name=ANONYMOUS;gbkey=Src;genome=chromosome;mol_type=genomic DNA
-1 . gene 1 1356 . + . ID=gene-test_000001;Name=dnaA;gbkey=Gene;gene=dnaA;gene_biotype=protein_coding
=====================================
Tests/GFF/spaces.gff3 deleted
=====================================
@@ -1,5 +0,0 @@
-##gff-version 3
-contig1 . gene 1544 2057 . - . ID=contig1.1
-contig1 . mRNA 1544 2057 . - . ID=mRNA.contig1.1;Parent=contig1.1
-contig1 . mRNA 1544 2057 . - . foo=bar;ID=mRNA.contig1.1;Parent=contig1.1
-contig1 . mRNA 1544 2057 . - . ID=mRNA.contig1.1;Parent=contig1.1; foo=bar
=====================================
Tests/GFF/trans_splicing.gff3 deleted
=====================================
@@ -1,11 +0,0 @@
-1 manual gene 9559 9672 . + . ID=gene83;Name=rps12|lcl|NC_021456.1_cdsid_YP_008082803.1_8-gene;exception=trans-splicing
-1 manual gene 112442 113241 . + . ID=gene84;Name=rps12|lcl|NC_021456.1_cdsid_YP_008082803.1_8-gene;exception=trans-splicing
-1 manual mRNA 9559 9672 . + . ID=mRNA43;Parent=gene83,gene84;Name=rps12|lcl|NC_021456.1_cdsid_YP_008082803.1_8;exception=trans-splicing
-1 manual mRNA 112442 113241 . + . ID=mRNA43;Parent=gene83,gene84;Name=rps12|lcl|NC_021456.1_cdsid_YP_008082803.1_8;exception=trans-splicing
-1 manual exon 9559 9672 . + . Parent=mRNA43
-1 manual CDS 9559 9672 . + 0 Parent=mRNA43
-1 manual exon 112442 112673 . + . Parent=mRNA43
-1 manual CDS 112442 112673 . + 0 Parent=mRNA43
-1 manual intron 112674 113215 . + . Parent=mRNA43
-1 manual exon 113216 113241 . + . Parent=mRNA43
-1 manual CDS 113216 113241 . + 2 Parent=mRNA43
=====================================
Tests/GFF/transcripts.gff3 deleted
=====================================
@@ -1,18 +0,0 @@
-##gff-version 3
-##date 2013-11-13
-edit_test.fa . gene 500 2610 . + . ID=newGene
-edit_test.fa . mRNA 500 2385 . + . Parent=newGene;Namo=reinhard+did+this;Name=t1%28newGene%29;ID=t1;uri=http%3A//www.yahoo.com
-edit_test.fa . five_prime_UTR 500 802 . + . Parent=t1
-edit_test.fa . CDS 803 1012 . + . Parent=t1
-edit_test.fa . three_prime_UTR 1013 1168 . + . Parent=t1
-edit_test.fa . three_prime_UTR 1475 1654 . + . Parent=t1
-edit_test.fa . three_prime_UTR 1720 1908 . + . Parent=t1
-edit_test.fa . three_prime_UTR 2047 2385 . + . Parent=t1
-edit_test.fa . mRNA 1050 2610 . + . Parent=newGene;Name=t2%28newGene%29;ID=t2
-edit_test.fa . CDS 1050 1196 . + . Parent=t2
-edit_test.fa . CDS 1472 1651 . + . Parent=t2
-edit_test.fa . CDS 1732 2610 . + . Parent=t2
-edit_test.fa . mRNA 1050 2610 . + . Parent=newGene;Name=t3%28newGene%29;ID=t3
-edit_test.fa . CDS 1050 1196 . + . Parent=t3
-edit_test.fa . CDS 1472 1651 . + . Parent=t3
-edit_test.fa . CDS 1732 2610 . + . Parent=t3
=====================================
Tests/GFF/unescaped-semicolon.gff3 deleted
=====================================
@@ -1,2 +0,0 @@
-##gff-version 3
-chr1 . gene 1 100 . + . ID=PH01000020G1780;Description="osFTL6 FT-Like6 homologous to Flowering Locus T gene; contains Pfam profile PF01161: Phosphatidylethanolamine-binding protein, expressed"
\ No newline at end of file
=====================================
Tests/GFF/wormbase_gff2.txt deleted
=====================================
@@ -1,63 +0,0 @@
-I Genomic_canonical region 1 2679 . + . Sequence "cTel33B" ; Note "Clone cTel33B; Genbank AC199162" ; Note "Clone cTel33B; Genbank AC199162"
-I Coding_transcript Transcript 12759582 12764949 . - . Transcript "B0019.1" ; WormPep "WP:CE40797" ; Note "amx-2" ; Prediction_status "Partially_confirmed" ; Gene "WBGene00000138" ; CDS "B0019.1" ; WormPep "WP:CE40797" ; Note "amx-2" ; Prediction_status "Partially_confirmed" ; Gene "WBGene00000138"
-I Coding_transcript intron 12759829 12759948 . - . Transcript "B0019.1" ; Confirmed_EST EC034652
-I Coding_transcript intron 12760014 12760226 . - . Transcript "B0019.1" ; Confirmed_EST EC034652
-I Coding_transcript intron 12760320 12760364 . - . Transcript "B0019.1" ; Confirmed_EST yk1054h04.3
-I Coding_transcript intron 12760495 12760833 . - . Transcript "B0019.1" ; Confirmed_EST EC027594
-I Coding_transcript intron 12760905 12761171 . - . Transcript "B0019.1" ; Confirmed_EST EC027594
-I Coding_transcript intron 12761517 12761798 . - . Transcript "B0019.1" ; Confirmed_EST EC027594
-I Coding_transcript intron 12761954 12762126 . - . Transcript "B0019.1" ; Confirmed_EST yk262g9.5
-I Coding_transcript intron 12762269 12762647 . - . Transcript "B0019.1" ; Confirmed_EST yk262g9.5
-I Coding_transcript intron 12762807 12763111 . - . Transcript "B0019.1" ; Confirmed_EST yk1056c07.5
-I Coding_transcript intron 12763250 12763447 . - . Transcript "B0019.1" ; Confirmed_EST yk1056c07.5
-I Coding_transcript intron 12763656 12763728 . - . Transcript "B0019.1" ; Confirmed_EST yk1056c07.5
-I Coding_transcript intron 12763883 12763978 . - . Transcript "B0019.1" ; Confirmed_EST yk1054h04.5 ; Confirmed_EST OSTF088D9_1
-I Coding_transcript intron 12764103 12764290 . - . Transcript "B0019.1" ; Confirmed_EST yk1054h04.5 ; Confirmed_EST OSTF088D9_1
-I Coding_transcript intron 12764472 12764811 . - . Transcript "B0019.1" ; Confirmed_EST yk1054h04.5 ; Confirmed_EST OSTF088D9_1
-I Coding_transcript exon 12759582 12759828 . - . Transcript "B0019.1"
-I Coding_transcript exon 12759949 12760013 . - . Transcript "B0019.1"
-I Coding_transcript exon 12760227 12760319 . - . Transcript "B0019.1"
-I Coding_transcript exon 12760365 12760494 . - . Transcript "B0019.1"
-I Coding_transcript exon 12760834 12760904 . - . Transcript "B0019.1"
-I Coding_transcript exon 12761172 12761516 . - . Transcript "B0019.1"
-I Coding_transcript exon 12761799 12761953 . - . Transcript "B0019.1"
-I Coding_transcript exon 12762127 12762268 . - . Transcript "B0019.1"
-I Coding_transcript exon 12762648 12762806 . - . Transcript "B0019.1"
-I Coding_transcript exon 12763112 12763249 . - . Transcript "B0019.1"
-I Coding_transcript exon 12763448 12763655 . - . Transcript "B0019.1"
-I Coding_transcript exon 12763729 12763882 . - . Transcript "B0019.1"
-I Coding_transcript exon 12763979 12764102 . - . Transcript "B0019.1"
-I Coding_transcript exon 12764291 12764471 . - . Transcript "B0019.1"
-I Coding_transcript exon 12764812 12764949 . - . Transcript "B0019.1"
-I SAGE_tag_unambiguously_mapped SAGE_tag 12761492 12761512 . - . Sequence SAGE:aacggagccgtacacgc;count 5;Gene amx-2;Transcript B0019.1
-I SAGE_tag_most_three_prime SAGE_tag 12761499 12761512 . - . Sequence SAGE:aacggagccg;count 9;Gene amx-2;Transcript B0019.1
-I mass_spec_genome translated_nucleotide_match 12761920 12761953 . - . Target "Mass_spec_peptide:MSP:FADFSPLDVSDVNFATDDLAK" 10 21 ; Note "MSP:FADFSPLDVSDVNFATDDLAK" ; Protein_matches "WP:CE40797" ; CDS_matches "B0019.1" ; Times_observed "3"
-I mass_spec_genome translated_nucleotide_match 12762127 12762155 . - . Target "Mass_spec_peptide:MSP:FADFSPLDVSDVNFATDDLAK" 1 10 ; Note "MSP:FADFSPLDVSDVNFATDDLAK" ; Protein_matches "WP:CE40797" ; CDS_matches "B0019.1" ; Times_observed "3"
-I mass_spec_genome translated_nucleotide_match 12763506 12763559 . - . Target "Mass_spec_peptide:MSP:FGHGQSLLAQGGMNEVVR" 1 18 ; Note "MSP:FGHGQSLLAQGGMNEVVR" ; Protein_matches "WP:CE40797" ; CDS_matches "B0019.1" ; Times_observed "1"
-I SAGE_tag_unambiguously_mapped SAGE_tag 12763533 12763553 . - . Sequence SAGE:ggcagagtcttttggca;count 1;Gene amx-2;Transcript B0019.1
-I mass_spec_genome translated_nucleotide_match 12764361 12764411 . - . Target "Mass_spec_peptide:MSP:NIQQNRPGLSVLVLEAR" 1 17 ; Note "MSP:NIQQNRPGLSVLVLEAR" ; Protein_matches "WP:CE40797" ; CDS_matches "B0019.1" ; Times_observed "2"
-I GenePair_STS PCR_product 12762449 12764118 . + . PCR_product "sjj_B0019.1"
-I Expr_profile experimental_result_region 12762449 12764118 . + . Expr_profile "B0019.1"
-I Allele SNP 12764272 12764272 . + . Variation "snp_B0019[1]" ; Interpolated_map_position "14.003" ; ; RFLP "No"
-I Promoterome PCR_product 12764938 12766937 . + . PCR_product "p_B0019.1_93"
-I Oligo_set reagent 12759745 12761589 . - . Oligo_set "Aff_B0019.1"
-I Orfeome PCR_product 12759747 12764936 . - . PCR_product "mv_B0019.1" ; Amplified 1 ; Amplified 1
-I Coding_transcript three_prime_UTR 12759582 12759744 . - . Transcript "B0019.1"
-I Coding_transcript coding_exon 12759745 12759828 . - 0 Transcript "B0019.1" ; CDS "B0019.1"
-I Coding_transcript coding_exon 12759949 12760013 . - 2 Transcript "B0019.1" ; CDS "B0019.1"
-I Coding_transcript coding_exon 12760227 12760319 . - 2 Transcript "B0019.1" ; CDS "B0019.1"
-I Coding_transcript coding_exon 12760365 12760494 . - 0 Transcript "B0019.1" ; CDS "B0019.1"
-I Coding_transcript coding_exon 12760834 12760904 . - 2 Transcript "B0019.1" ; CDS "B0019.1"
-I Coding_transcript coding_exon 12761172 12761516 . - 2 Transcript "B0019.1" ; CDS "B0019.1"
-I Coding_transcript coding_exon 12761799 12761953 . - 1 Transcript "B0019.1" ; CDS "B0019.1"
-I Coding_transcript coding_exon 12762127 12762268 . - 2 Transcript "B0019.1" ; CDS "B0019.1"
-I Coding_transcript coding_exon 12762648 12762806 . - 2 Transcript "B0019.1" ; CDS "B0019.1"
-I Coding_transcript coding_exon 12763112 12763249 . - 2 Transcript "B0019.1" ; CDS "B0019.1"
-I Coding_transcript coding_exon 12763448 12763655 . - 0 Transcript "B0019.1" ; CDS "B0019.1"
-I Coding_transcript coding_exon 12763729 12763882 . - 1 Transcript "B0019.1" ; CDS "B0019.1"
-I Coding_transcript coding_exon 12763979 12764102 . - 2 Transcript "B0019.1" ; CDS "B0019.1"
-I Coding_transcript coding_exon 12764291 12764471 . - 0 Transcript "B0019.1" ; CDS "B0019.1"
-I Coding_transcript five_prime_UTR 12764938 12764949 . - . Transcript "B0019.1"
-I Coding_transcript coding_exon 12764812 12764937 . - 0 Transcript "B0019.1" ; CDS "B0019.1"
-X SAGE_tag SAGE_tag 6819353 6819366 . + . Sequence SAGE:aacggagccg;count 9;Gene amx-2;Transcript B0019.1
-X gene processed_transcript 944828 948883 . - . Gene "WBGene00004893"
=====================================
Tests/GFF/wormbase_gff2_alt.txt deleted
=====================================
@@ -1,9 +0,0 @@
-Remanei_genome Genomic_canonical region 1 7816 . + . Sequence "Contig1020";
-Contig102 WU_MERGED CDS 1629 3377 . - . CDS "cr01.sctg102.wum.2.1"
-Contig102 WU_MERGED coding_exon 2927 3377 . - . CDS "cr01.sctg102.wum.2.1"
-Contig102 WU_MERGED coding_exon 2474 2875 . - . CDS "cr01.sctg102.wum.2.1"
-Contig102 WU_MERGED coding_exon 1928 2430 . - . CDS "cr01.sctg102.wum.2.1"
-Contig102 WU_MERGED coding_exon 1629 1883 . - . CDS "cr01.sctg102.wum.2.1"
-Contig102 WU_MERGED intron 2876 2926 . - . CDS "cr01.sctg102.wum.2.1"
-Contig102 WU_MERGED intron 2431 2473 . - . CDS "cr01.sctg102.wum.2.1"
-Contig102 WU_MERGED intron 1884 1927 . - . CDS "cr01.sctg102.wum.2.1"
=====================================
Tests/test_GFFSeqIOFeatureAdder.py deleted
=====================================
@@ -1,704 +0,0 @@
-"""Test decoration of existing SeqRecords with GFF through a SeqIO interface.
-"""
-import sys
-import os
-import unittest
-import pprint
-
-import six
-from six import StringIO
-
-from Bio import SeqIO
-from BCBio import GFF
-from Bio.Seq import Seq
-from Bio.SeqRecord import SeqRecord
-from Bio.SeqFeature import SeqFeature, FeatureLocation
-from BCBio.GFF import (GFFExaminer, GFFParser, DiscoGFFParser)
-
-
-class MapReduceGFFTest(unittest.TestCase):
- """Tests GFF parsing using a map-reduce framework for parallelization.
- """
-
- def setUp(self):
- self._test_dir = os.path.join(os.path.dirname(__file__), "GFF")
- self._test_gff_file = os.path.join(self._test_dir, "c_elegans_WS199_shortened_gff.txt")
- self._disco_host = "http://localhost:7000"
-
- def t_local_map_reduce(self):
- """General map reduce framework without parallelization.
- """
- cds_limit_info = dict(gff_type=["gene", "mRNA", "CDS"], gff_id=['I'])
- rec_dict = SeqIO.to_dict(GFF.parse(self._test_gff_file, limit_info=cds_limit_info))
- test_rec = rec_dict['I']
- assert len(test_rec.features) == 32
-
- def t_disco_map_reduce(self):
- """Map reduce framework parallelized using disco.
- """
- # this needs to be more generalized but fails okay with no disco
- try:
- import disco
- import simplejson
- except ImportError:
- print("Skipping -- disco and json not found")
- return
- cds_limit_info = dict(
- gff_source_type=[('Non_coding_transcript', 'gene'), ('Coding_transcript', 'gene'),
- ('Coding_transcript', 'mRNA'), ('Coding_transcript', 'CDS')],
- gff_id=['I']
- )
- parser = DiscoGFFParser(disco_host=self._disco_host)
- rec_dict = SeqIO.to_dict(parser.parse(self._test_gff_file, limit_info=cds_limit_info))
- final_rec = rec_dict['I']
- # second gene feature is multi-parent
- assert len(final_rec.features) == 2 # two gene feature
-
-
-class GFF3Test(unittest.TestCase):
- """Real live GFF3 tests from WormBase and NCBI.
-
- Uses GFF3 data from:
-
- ftp://ftp.wormbase.org/pub/wormbase/genomes/c_elegans/
- genome_feature_tables/GFF3/
- ftp://ftp.wormbase.org/pub/wormbase/genomes/c_elegans/sequences/dna/
-
- and from NCBI.
- """
-
- def setUp(self):
- self._test_dir = os.path.join(os.path.dirname(__file__), "GFF")
- self._test_seq_file = os.path.join(self._test_dir, "c_elegans_WS199_dna_shortened.fa")
- self._test_gff_file = os.path.join(self._test_dir, "c_elegans_WS199_shortened_gff.txt")
- self._test_gff_ann_file = os.path.join(self._test_dir, "c_elegans_WS199_ann_gff.txt")
- self._full_dir = "/usr/home/chapmanb/mgh/ruvkun_rnai/wormbase/" + \
- "data_files_WS198"
- self._test_ncbi = os.path.join(self._test_dir, "ncbi_gff3.txt")
-
- def not_t_full_celegans(self):
- """Test the full C elegans chromosome and GFF files.
-
- This is used to test GFF on large files and is not run as a standard
- test. You will need to download the files and adjust the paths
- to run this.
- """
- # read the sequence information
- seq_file = os.path.join(self._full_dir, "c_elegans.WS199.dna.fa")
- gff_file = os.path.join(self._full_dir, "c_elegans.WS199.gff3")
- seq_handle = open(seq_file)
- seq_dict = SeqIO.to_dict(SeqIO.parse(seq_handle, "fasta"))
- seq_handle.close()
- #with open(gff_file) as gff_handle:
- # possible_limits = feature_adder.available_limits(gff_handle)
- # pprint.pprint(possible_limits)
- rnai_types = [('Orfeome', 'PCR_product'), ('GenePair_STS', 'PCR_product'), ('Promoterome', 'PCR_product')]
- gene_types = [('Non_coding_transcript', 'gene'), ('Coding_transcript', 'gene'), ('Coding_transcript', 'mRNA'),
- ('Coding_transcript', 'CDS')]
- limit_info = dict(gff_source_type=rnai_types + gene_types)
- for rec in GFF.parse(gff_file, seq_dict, limit_info=limit_info):
- pass
-
- def _get_seq_dict(self):
- """Internal reusable function to get the sequence dictionary.
- """
- seq_handle = open(self._test_seq_file)
- seq_dict = SeqIO.to_dict(SeqIO.parse(seq_handle, "fasta"))
- seq_handle.close()
- return seq_dict
-
- def t_possible_limits(self):
- """Calculate possible queries to limit a GFF file.
- """
- gff_examiner = GFFExaminer()
- possible_limits = gff_examiner.available_limits(self._test_gff_file)
- print()
- pprint.pprint(possible_limits)
-
- def t_parent_child(self):
- """Summarize parent-child relationships in a GFF file.
- """
- gff_examiner = GFFExaminer()
- pc_map = gff_examiner.parent_child_map(self._test_gff_file)
- print()
- pprint.pprint(pc_map)
-
- def t_parent_child_file_modes(self):
- """Summarize parent-child relationships in a GFF file.
- """
- gff_examiner = GFFExaminer()
- # Use the loaded-from-filename as reference
- pc_map = gff_examiner.parent_child_map(self._test_gff_file)
-
- with open(self._test_gff_file, "rt") as handle:
- assert pc_map == gff_examiner.parent_child_map(handle)
-
- with open(self._test_gff_file, "rb") as handle:
- if six.PY2:
- assert pc_map == gff_examiner.parent_child_map(handle)
- else:
- try:
- gff_examiner.parent_child_map(handle)
- except TypeError as e:
- assert str(e) == "input handle must be opened in text mode", e
- else:
- assert False, "expected TypeError to be raised"
-
- def t_flat_features(self):
- """Check addition of flat non-nested features to multiple records.
- """
- seq_dict = self._get_seq_dict()
- pcr_limit_info = dict(
- gff_source_type=[('Orfeome', 'PCR_product'), ('GenePair_STS',
- 'PCR_product'), ('Promoterome', 'PCR_product')]
- )
- parser = GFFParser()
- rec_dict = SeqIO.to_dict(parser.parse(self._test_gff_file, seq_dict, limit_info=pcr_limit_info))
- assert len(rec_dict['I'].features) == 4
- assert len(rec_dict['X'].features) == 5
-
- def t_nested_features(self):
- """Check three-deep nesting of features with gene, mRNA and CDS.
- """
- seq_dict = self._get_seq_dict()
- cds_limit_info = dict(
- gff_source_type=[('Coding_transcript', 'gene'), ('Coding_transcript', 'mRNA'),
- ('Coding_transcript', 'CDS')],
- gff_id=['I']
- )
- parser = GFFParser()
- rec_dict = SeqIO.to_dict(parser.parse(self._test_gff_file, seq_dict, limit_info=cds_limit_info))
- final_rec = rec_dict['I']
- # first gene feature is plain
- assert len(final_rec.features) == 2 # two gene feature
- assert len(final_rec.features[0].sub_features) == 1 # one transcript
- # 15 final CDS regions
- assert len(final_rec.features[0].sub_features[0].sub_features) == 15
-
- def t_nested_multiparent_features(self):
- """Verify correct nesting of features with multiple parents.
- """
- seq_dict = self._get_seq_dict()
- cds_limit_info = dict(
- gff_source_type=[('Coding_transcript', 'gene'), ('Coding_transcript', 'mRNA'),
- ('Coding_transcript', 'CDS')],
- gff_id=['I']
- )
- parser = GFFParser()
- rec_dict = SeqIO.to_dict(parser.parse(self._test_gff_file, seq_dict, limit_info=cds_limit_info))
- final_rec = rec_dict['I']
- # second gene feature is multi-parent
- assert len(final_rec.features) == 2 # two gene feature
- cur_subs = final_rec.features[1].sub_features
- assert len(cur_subs) == 3 # three transcripts
- # the first and second transcript have the same CDSs
- assert len(cur_subs[0].sub_features) == 6
- assert len(cur_subs[1].sub_features) == 6
- assert cur_subs[0].sub_features[0] is cur_subs[1].sub_features[0]
-
- def t_no_dict_error(self):
- """Ensure an error is raised when no dictionary to map to is present.
- """
- parser = GFFParser(create_missing=False)
- try:
- for rec in parser.parse(self._test_gff_file):
- pass
- # no error -- problem
- raise AssertionError('Did not complain with missing dictionary')
- except KeyError:
- pass
-
- def t_unknown_seq(self):
- """Prepare unknown base sequences with the correct length.
- """
- rec_dict = SeqIO.to_dict(GFF.parse(self._test_gff_file))
- assert len(rec_dict["I"].seq) == 12766937
- assert len(rec_dict["X"].seq) == 17718531
-
- def t_gff_annotations(self):
- """Check GFF annotations placed on an entire sequence.
- """
- parser = GFFParser()
- rec_dict = SeqIO.to_dict(parser.parse(self._test_gff_ann_file))
- final_rec = rec_dict['I']
- assert len(final_rec.annotations.keys()) == 2
- assert final_rec.annotations['source'] == ['Expr_profile']
- assert final_rec.annotations['expr_profile'] == ['B0019.1']
-
- def t_gff3_iterator(self):
- """Iterated parsing in GFF3 files with nested features.
- """
- parser = GFFParser()
- recs = [r for r in parser.parse_in_parts(self._test_gff_file, target_lines=70)]
- # should be one big set because we don't have a good place to split
- assert len(recs) == 6
- assert len(recs[0].features) == 59
-
- def t_gff3_iterator_limit(self):
- """Iterated interface using a limit query on GFF3 files.
- """
- cds_limit_info = dict(
- gff_source_type=[('Coding_transcript', 'gene'), ('Coding_transcript', 'mRNA'),
- ('Coding_transcript', 'CDS')],
- gff_id=['I']
- )
- parser = GFFParser()
- rec_dict = SeqIO.to_dict(parser.parse(self._test_gff_file, limit_info=cds_limit_info))
- assert len(rec_dict) == 1
- tfeature = rec_dict["I"].features[0].sub_features[0]
- for sub_test in tfeature.sub_features:
- assert sub_test.type == "CDS", sub_test
-
- def t_gff3_noval_attrib(self):
- """Parse GFF3 file from NCBI with a key/value pair with no value.
- """
- parser = GFFParser()
- rec_dict = SeqIO.to_dict(parser.parse(self._test_ncbi))
- assert len(rec_dict) == 1
- t_feature = list(rec_dict.values())[0].features[0]
- assert t_feature.qualifiers["pseudo"] == ["true"]
-
- def t_gff3_multiple_ids(self):
- """Deal with GFF3 with non-unique ID attributes, using NCBI example.
- """
- parser = GFFParser()
- rec_dict = SeqIO.to_dict(parser.parse(self._test_ncbi))
- assert len(rec_dict) == 1
- t_features = list(rec_dict.values())[0].features[1:]
- # 4 feature sets, same ID, different positions, different attributes
- assert len(t_features) == 4
- for f in t_features:
- assert len(f.sub_features) == 3
-
- def t_simple_parsing(self):
- """Parse GFF into a simple line by line dictionary without nesting.
- """
- parser = GFFParser()
- num_lines = 0
- for line_info in parser.parse_simple(self._test_gff_file):
- num_lines += 1
- assert num_lines == 177, num_lines
- line_info = line_info['child'][0]
- assert line_info['quals']['confirmed_est'] == \
- ['yk1055g06.5', 'OSTF085G5_1']
- assert line_info['location'] == [4582718, 4583189]
-
- def t_simple_parsing_nesting(self):
- """Simple parsing for lines with nesting, using the simplified API.
- """
- test_gff = os.path.join(self._test_dir, "transcripts.gff3")
- num_lines = 0
- for line_info in GFF.parse_simple(test_gff):
- num_lines += 1
- assert num_lines == 16, num_lines
-
- def t_extra_comma(self):
- """Correctly handle GFF3 files with extra trailing commas.
- """
- tfile = os.path.join(self._test_dir, "mouse_extra_comma.gff3")
- in_handle = open(tfile)
- for rec in GFF.parse(in_handle):
- pass
- in_handle.close()
- tested = False
- for sub_top in rec.features[0].sub_features:
- for sub in sub_top.sub_features:
- if sub.qualifiers.get("Name", "") == ["CDS:NC_000083.5:LOC100040603"]:
- tested = True
- assert len(sub.qualifiers["Parent"]) == 1
- assert tested, "Did not find sub-feature to test"
-
- def t_novalue_key(self):
- """Handle GFF3 files with keys and no values.
- """
- tfile = os.path.join(self._test_dir, "glimmer_nokeyval.gff3")
- rec = six.next(GFF.parse(tfile))
- f1, f2 = rec.features
- assert f1.qualifiers['ID'] == ['GL0000006']
- assert len(f1.sub_features) == 2
- assert f1.sub_features[0].qualifiers["Lack 3'-end"] == ["true"]
- assert not "ID" in f1.sub_features[0].qualifiers
- assert f2.qualifiers["Complete"] == ["true"]
-
- def t_key_whitespace(self):
- """Fix keys with problematic whitespace.
- """
- tfile = os.path.join(self._test_dir, "spaces.gff3")
- for i, line_info in enumerate(GFF.parse_simple(tfile)):
- if i > 2:
- assert line_info["quals"]["foo"] == ["bar"]
-
- def t_trans_spliicing(self):
- """Parsing of transspliced genes from GFF3 spec where child locations don't match to parents.
- """
- fname = os.path.join(self._test_dir, "trans_splicing.gff3")
- with open(fname) as in_handle:
- rec = six.next(GFF.parse(in_handle))
- assert len(rec.features) == 2
- assert rec.features[0].id == "gene83"
- assert len(rec.features[0].sub_features) == 2
- assert len(rec.features[0].sub_features[0].sub_features) == 7
-
- assert rec.features[1].id == "gene84"
- assert len(rec.features[1].sub_features) == 2
- assert len(rec.features[1].sub_features[0].sub_features) == 7
-
-
-class SolidGFFTester(unittest.TestCase):
- """Test reading output from SOLiD analysis, as GFF3.
-
- See more details on SOLiD GFF here:
-
- http://solidsoftwaretools.com/gf/project/matogff/
- """
-
- def setUp(self):
- self._test_dir = os.path.join(os.path.dirname(__file__), "GFF")
- self._test_gff_file = os.path.join(self._test_dir, "F3-unique-3.v2.gff")
-
- def t_basic_solid_parse(self):
- """Basic parsing of SOLiD GFF results files.
- """
- parser = GFFParser()
- rec_dict = SeqIO.to_dict(parser.parse(self._test_gff_file))
- test_feature = rec_dict['3_341_424_F3'].features[0]
- assert test_feature.location.nofuzzy_start == 102716
- assert test_feature.location.nofuzzy_end == 102736
- assert len(test_feature.qualifiers) == 7
- assert test_feature.qualifiers['score'] == ['10.6']
- assert test_feature.qualifiers['source'] == ['solid']
- assert test_feature.strand == -1
- assert test_feature.type == 'read'
- assert test_feature.qualifiers['g'] == ['T2203031313223113212']
- assert len(test_feature.qualifiers['q']) == 20
-
- def t_solid_iterator(self):
- """Iterated parsing in a flat file without nested features.
- """
- parser = GFFParser()
- feature_sizes = []
- for rec in parser.parse_in_parts(self._test_gff_file, target_lines=5):
- feature_sizes.append(len(rec.features))
- assert len(feature_sizes) == 112
- assert max(feature_sizes) == 1
-
- def t_line_adjust(self):
- """Adjust lines during parsing to fix potential GFF problems.
- """
-
- def adjust_fn(results):
- rec_index = results['quals']['i'][0]
- read_name = results['rec_id']
- results['quals']['read_name'] = [read_name]
- results['rec_id'] = rec_index
- return results
-
- parser = GFFParser(line_adjust_fn=adjust_fn)
- recs = [r for r in parser.parse(self._test_gff_file)]
- assert len(recs) == 1
- work_rec = recs[0]
- assert work_rec.id == '1'
- assert len(work_rec.features) == 112
- assert work_rec.features[0].qualifiers['read_name'] == \
- ['3_336_815_F3']
-
-
-class GFF2Tester(unittest.TestCase):
- """Parse GFF2 and GTF files, building features.
- """
-
- def setUp(self):
- self._test_dir = os.path.join(os.path.dirname(__file__), "GFF")
- self._ensembl_file = os.path.join(self._test_dir, "ensembl_gtf.txt")
- self._wormbase_file = os.path.join(self._test_dir, "wormbase_gff2.txt")
- self._jgi_file = os.path.join(self._test_dir, "jgi_gff2.txt")
- self._wb_alt_file = os.path.join(self._test_dir, "wormbase_gff2_alt.txt")
-
- def t_basic_attributes(self):
- """Parse out basic attributes of GFF2 from Ensembl GTF.
- """
- limit_info = dict(gff_source_type=[('snoRNA', 'exon')])
- rec_dict = SeqIO.to_dict(GFF.parse(self._ensembl_file, limit_info=limit_info))
- work_rec = rec_dict['I']
- assert len(work_rec.features) == 1
- test_feature = work_rec.features[0]
- qual_keys = list(test_feature.qualifiers.keys())
- qual_keys.sort()
- assert qual_keys == [
- 'Parent', 'exon_number', 'gene_id', 'gene_name', 'source', 'transcript_id', 'transcript_name'
- ]
- assert test_feature.qualifiers['source'] == ['snoRNA']
- assert test_feature.qualifiers['transcript_name'] == ['NR_001477.2']
- assert test_feature.qualifiers['exon_number'] == ['1']
-
- def t_tricky_semicolons(self):
- """Parsing of tricky semi-colon positions in WormBase GFF2.
- """
- limit_info = dict(gff_source_type=[('Genomic_canonical', 'region')])
- rec_dict = SeqIO.to_dict(GFF.parse(self._wormbase_file, limit_info=limit_info))
- work_rec = rec_dict['I']
- assert len(work_rec.features) == 1
- test_feature = work_rec.features[0]
- assert test_feature.qualifiers['Note'] == \
- ['Clone cTel33B; Genbank AC199162', 'Clone cTel33B; Genbank AC199162'], test_feature.qualifiers["Note"]
-
- def t_unescaped_semicolons(self):
- """Parse inputs with unescaped semi-colons.
- This is a band-aid to not fail rather than correct parsing, since
- the combined feature will not be maintained.
- """
- f = os.path.join(self._test_dir, "unescaped-semicolon.gff3")
- rec_dict = SeqIO.to_dict(GFF.parse(f))
- f = rec_dict['chr1'].features[0]
- assert f.qualifiers["Description"][0].startswith('osFTL6')
- assert f.qualifiers["Description"][0].endswith('protein, expressed')
-
- def t_jgi_gff(self):
- """Parsing of JGI formatted GFF2, nested using transcriptId and proteinID
- """
- rec_dict = SeqIO.to_dict(GFF.parse(self._jgi_file))
- tfeature = rec_dict['chr_1'].features[0]
- assert tfeature.location.nofuzzy_start == 37060
- assert tfeature.location.nofuzzy_end == 38216
- assert tfeature.type == 'inferred_parent'
- assert len(tfeature.sub_features) == 6
- sfeature = tfeature.sub_features[1]
- assert sfeature.qualifiers['proteinId'] == ['873']
- assert sfeature.qualifiers['phase'] == ['0']
-
- def t_ensembl_nested_features(self):
- """Test nesting of features with GFF2 files using transcript_id.
-
- XXX sub_features no longer supported in Biopython
- """
- rec_dict = SeqIO.to_dict(GFF.parse(self._ensembl_file))
- assert len(rec_dict["I"].features) == 2
- t_feature = rec_dict["I"].features[0]
- #assert len(t_feature.sub_features) == 32, len(t_feature.sub_features)
-
- def t_wormbase_nested_features(self):
- """Test nesting of features with GFF2 files using Transcript only.
- """
- rec_dict = SeqIO.to_dict(GFF.parse(self._wormbase_file))
- assert len(rec_dict) == 3
- parent_features = [f for f in rec_dict["I"].features if f.type == "Transcript"]
- assert len(parent_features) == 1
- inferred_features = [f for f in rec_dict["I"].features if f.type == "inferred_parent"]
- assert len(inferred_features) == 0
- tfeature = parent_features[0]
- assert tfeature.qualifiers["WormPep"][0] == "WP:CE40797"
- assert len(tfeature.sub_features) == 46
-
- def t_wb_cds_nested_features(self):
- """Nesting of GFF2 features with a flat CDS key value pair.
- """
- rec_dict = SeqIO.to_dict(GFF.parse(self._wb_alt_file))
- assert len(rec_dict) == 2
- features = list(rec_dict.values())[0].features
- assert len(features) == 1
- tfeature = features[0]
- assert tfeature.id == "cr01.sctg102.wum.2.1"
- assert len(tfeature.sub_features) == 7
-
- def t_gff2_iteration(self):
- """Test iterated features with GFF2 files, breaking without parents.
- """
- recs = []
- for rec in GFF.parse(self._wormbase_file, target_lines=15):
- recs.append(rec)
- assert len(recs) == 4
- assert recs[0].features[0].type == 'region'
- assert recs[0].features[1].type == 'SAGE_tag'
- assert len(recs[0].features[2].sub_features) == 29
-
-
-class DirectivesTest(unittest.TestCase):
- """Tests for parsing directives and other meta-data.
- """
-
- def setUp(self):
- self._test_dir = os.path.join(os.path.dirname(__file__), "GFF")
- self._gff_file = os.path.join(self._test_dir, "hybrid1.gff3")
- self._ncbi_gff = os.path.join(self._test_dir, "hybrid2.gff3")
- self._ncbi_fa = os.path.join(self._test_dir, "hybrid2.fa")
- self._problem_seq_region_file = os.path.join(self._test_dir, "problem_sequence_region.gff3")
-
- def t_basic_directives(self):
- """Parse out top level meta-data supplied in a GFF3 file.
- """
- recs = SeqIO.to_dict(GFF.parse(self._gff_file))
- anns = recs['chr17'].annotations
- assert anns['gff-version'] == ['3']
- assert anns['attribute-ontology'] == ['baz']
- assert anns['feature-ontology'] == ['bar']
- assert anns['source-ontology'] == ['boo']
- assert anns['sequence-region'] == [('foo', 0, 100), ('chr17', 62467933, 62469545)]
-
- def t_fasta_directive(self):
- """Parse FASTA sequence information contained in a GFF3 file.
- """
- recs = SeqIO.to_dict(GFF.parse(self._gff_file))
- assert len(recs) == 1
- test_rec = recs['chr17']
- assert str(test_rec.seq) == "GATTACAGATTACA"
-
- def t_fasta_directive_w_ncbi(self):
- """Parse FASTA sequence information contained in a GFF3 file with NCBI style IDs.
- """
- with open(self._ncbi_fa) as seq_handle:
- seq_dict = SeqIO.to_dict(SeqIO.parse(seq_handle, "fasta"))
- recs = SeqIO.to_dict(GFF.parse(self._ncbi_gff, seq_dict))
- assert len(recs) == 1
- test_rec = recs['lcl|chr17']
- assert str(test_rec.seq) == "GATTACAGATTACA"
-
- def t_fasta_directive_w_ncbi_fa(self):
- """Parse FASTA sequence information contained in a separate file with NCBI style IDs.
- """
- recs = SeqIO.to_dict(GFF.parse(self._ncbi_gff))
- assert len(recs) == 1
- test_rec = recs['chr17']
- assert str(test_rec.seq) == "GATTACAGATTACA"
-
- def t_examiner_with_fasta(self):
- """Perform high level examination of files with FASTA directives.
- """
- examiner = GFFExaminer()
- pc_map = examiner.parent_child_map(self._gff_file)
- assert pc_map[('UCSC', 'mRNA')] == [('UCSC', 'CDS')]
- limits = examiner.available_limits(self._gff_file)
- assert list(limits['gff_id'].keys())[0][0] == 'chr17'
- assert sorted(limits['gff_source_type'].keys()) == \
- [('UCSC', 'CDS'), ('UCSC', 'mRNA')]
-
- def t_problem_sequence_region(self):
- """Avoid issues with sequence region directives lacking contigs
- """
- recs = SeqIO.to_dict(GFF.parse(self._problem_seq_region_file))
- anns = recs['1'].annotations
- assert anns['gff-version'] == ['3']
- assert anns['sequence-region'] == [(0, 2482535)]
-
-
-class OutputTest(unittest.TestCase):
- """Tests to write SeqFeatures to GFF3 output format.
- """
-
- def setUp(self):
- self._test_dir = os.path.join(os.path.dirname(__file__), "GFF")
- self._test_seq_file = os.path.join(self._test_dir, "c_elegans_WS199_dna_shortened.fa")
- self._test_gff_file = os.path.join(self._test_dir, "c_elegans_WS199_shortened_gff.txt")
- self._test_gff_ann_file = os.path.join(self._test_dir, "c_elegans_WS199_ann_gff.txt")
- self._wormbase_file = os.path.join(self._test_dir, "wormbase_gff2.txt")
-
- def t_gff3_to_gff3(self):
- """Read in and write out GFF3 without any loss of information.
- """
- recs = SeqIO.to_dict(GFF.parse(self._test_gff_file))
- out_handle = StringIO()
- GFF.write(recs.values(), out_handle)
- wrote_handle = StringIO(out_handle.getvalue())
- recs_two = SeqIO.to_dict(GFF.parse(wrote_handle))
-
- orig_rec = list(recs.values())[0]
- re_rec = list(recs.values())[0]
- assert len(orig_rec.features) == len(re_rec.features)
- for i, orig_f in enumerate(orig_rec.features):
- assert str(orig_f) == str(re_rec.features[i])
-
- def t_gff2_to_gff3(self):
- """Read in GFF2 and write out as GFF3.
- """
- recs = SeqIO.to_dict(GFF.parse(self._wormbase_file))
- out_handle = StringIO()
- GFF.write(recs.values(), out_handle)
- wrote_handle = StringIO(out_handle.getvalue())
- # check some tricky lines in the GFF2 file
- checks = 0
- for line in wrote_handle:
- if line.find("Interpolated_map_position") >= 0:
- checks += 1
- assert line.find("RFLP=No") > 0
- if line.find("Gene=WBGene00000138") > 0:
- checks += 1
- assert line.find("ID=B0019.1") > 0
- if line.find("translated_nucleotide_match\t12762127") > 0:
- checks += 1
- assert line.find("Note=MSP:FADFSPLDVSDVNFATDDLAK") > 0
- assert checks == 3, "Missing check line"
-
- def t_write_from_recs(self):
- """Write out GFF3 from SeqRecord inputs.
- """
- seq = Seq("GATCGATCGATCGATCGATC")
- rec = SeqRecord(seq, "ID1")
- qualifiers = {"source": "prediction", "score": 10.0, "other": ["Some", "annotations"], "ID": "gene1"}
- sub_qualifiers = {"source": "prediction"}
- top_feature = SeqFeature(FeatureLocation(0, 20), type="gene", strand=1, qualifiers=qualifiers)
- top_feature.sub_features = [
- SeqFeature(FeatureLocation(0, 5), type="exon", strand=1, qualifiers=sub_qualifiers),
- SeqFeature(FeatureLocation(15, 20), type="exon", strand=1, qualifiers=sub_qualifiers)
- ]
- rec.features = [top_feature]
- out_handle = StringIO()
- GFF.write([rec], out_handle)
- wrote_info = out_handle.getvalue().split("\n")
- assert wrote_info[0] == "##gff-version 3"
- assert wrote_info[1] == "##sequence-region ID1 1 20"
- print(wrote_info[2].split("\t"))
- assert wrote_info[2].split("\t") == [
- 'ID1', 'prediction', 'gene', '1', '20', '10.0', '+', '.', 'ID=gene1;other=Some,annotations'
- ]
- assert wrote_info[3].split("\t") == ['ID1', 'prediction', 'exon', '1', '5', '.', '+', '.', 'Parent=gene1']
-
- def t_write_fasta(self):
- """Include FASTA records in GFF output.
- """
- seq = Seq("GATCGATCGATCGATCGATC")
- rec = SeqRecord(seq, "ID1")
- qualifiers = {"source": "prediction", "score": 10.0, "other": ["Some", "annotations"], "ID": "gene1"}
- rec.features = [SeqFeature(FeatureLocation(0, 20), type="gene", strand=1, qualifiers=qualifiers)]
- out_handle = StringIO()
- GFF.write([rec], out_handle, include_fasta=True)
- wrote_info = out_handle.getvalue().split("\n")
- fasta_parts = wrote_info[3:]
- assert fasta_parts[0] == "##FASTA"
- assert fasta_parts[1] == ">ID1 <unknown description>"
- assert fasta_parts[2] == str(seq)
-
- def t_write_seqrecord(self):
- """Write single SeqRecords.
- """
- seq = Seq("GATCGATCGATCGATCGATC")
- rec = SeqRecord(seq, "ID1")
- qualifiers = {"source": "prediction", "score": 10.0, "other": ["Some", "annotations"], "ID": "gene1"}
- rec.features = [SeqFeature(FeatureLocation(0, 20), type="gene", strand=1, qualifiers=qualifiers)]
- out_handle = StringIO()
- GFF.write([rec], out_handle, include_fasta=True)
- wrote_info = out_handle.getvalue().split("\n")
- gff_line = wrote_info[2]
- assert gff_line.split("\t")[0] == "ID1"
-
-
-def run_tests(argv):
- test_suite = testing_suite()
- runner = unittest.TextTestRunner(sys.stdout, verbosity=2)
- runner.run(test_suite)
-
-
-def testing_suite():
- """Generate the suite of tests.
- """
- test_suite = unittest.TestSuite()
- test_loader = unittest.TestLoader()
- test_loader.testMethodPrefix = 't_'
- tests = [GFF3Test, MapReduceGFFTest, SolidGFFTester, GFF2Tester, DirectivesTest, OutputTest]
- #tests = [GFF3Test]
- for test in tests:
- cur_suite = test_loader.loadTestsFromTestCase(test)
- test_suite.addTest(cur_suite)
- return test_suite
-
-
-if __name__ == "__main__":
- sys.exit(run_tests(sys.argv))
=====================================
bcbio_gff.egg-info/PKG-INFO
=====================================
@@ -1,10 +1,9 @@
-Metadata-Version: 1.0
+Metadata-Version: 2.1
Name: bcbio-gff
-Version: 0.6.9
+Version: 0.7.0
Summary: Read and write Generic Feature Format (GFF) with Biopython integration.
Home-page: https://github.com/chapmanb/bcbb/tree/master/gff
Author: Brad Chapman
Author-email: chapmanb at 50mail.com
License: Biopython License
-Description: UNKNOWN
-Platform: UNKNOWN
+License-File: LICENSE
=====================================
bcbio_gff.egg-info/SOURCES.txt
=====================================
@@ -8,31 +8,6 @@ BCBio/GFF/GFFOutput.py
BCBio/GFF/GFFParser.py
BCBio/GFF/__init__.py
BCBio/GFF/_utils.py
-Scripts/gff/access_gff_index.py
-Scripts/gff/genbank_to_gff.py
-Scripts/gff/gff2_to_gff3.py
-Scripts/gff/gff_to_biosql.py
-Scripts/gff/gff_to_genbank.py
-Tests/test_GFFSeqIOFeatureAdder.py
-Tests/GFF/F3-unique-3.v2.gff
-Tests/GFF/c_elegans_WS199_ann_gff.txt
-Tests/GFF/c_elegans_WS199_dna_shortened.fa
-Tests/GFF/c_elegans_WS199_shortened_gff.txt
-Tests/GFF/ensembl_gtf.txt
-Tests/GFF/glimmer_nokeyval.gff3
-Tests/GFF/hybrid1.gff3
-Tests/GFF/hybrid2.fa
-Tests/GFF/hybrid2.gff3
-Tests/GFF/jgi_gff2.txt
-Tests/GFF/mouse_extra_comma.gff3
-Tests/GFF/ncbi_gff3.txt
-Tests/GFF/problem_sequence_region.gff3
-Tests/GFF/spaces.gff3
-Tests/GFF/trans_splicing.gff3
-Tests/GFF/transcripts.gff3
-Tests/GFF/unescaped-semicolon.gff3
-Tests/GFF/wormbase_gff2.txt
-Tests/GFF/wormbase_gff2_alt.txt
bcbio_gff.egg-info/PKG-INFO
bcbio_gff.egg-info/SOURCES.txt
bcbio_gff.egg-info/dependency_links.txt
View it on GitLab: https://salsa.debian.org/med-team/python-bcbio-gff/-/commit/112984ef376a389973c42d6ece673e459503c410
--
View it on GitLab: https://salsa.debian.org/med-team/python-bcbio-gff/-/commit/112984ef376a389973c42d6ece673e459503c410
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20230712/f9a60157/attachment-0001.htm>
More information about the debian-med-commit
mailing list