[med-svn] [Git][med-team/python-bcbio-gff][upstream] New upstream version 0.6.7

Étienne Mollier (@emollier) gitlab at salsa.debian.org
Sat Oct 9 20:40:19 BST 2021



Étienne Mollier pushed to branch upstream at Debian Med / python-bcbio-gff


Commits:
0195e227 by Étienne Mollier at 2021-10-09T21:21:38+02:00
New upstream version 0.6.7
- - - - -


30 changed files:

- BCBio/GFF/GFFParser.py
- BCBio/GFF/__init__.py
- PKG-INFO
- + Scripts/gff/access_gff_index.py
- + Scripts/gff/genbank_to_gff.py
- + Scripts/gff/gff2_to_gff3.py
- + Scripts/gff/gff_to_biosql.py
- + Scripts/gff/gff_to_genbank.py
- + Tests/GFF/F3-unique-3.v2.gff
- + Tests/GFF/c_elegans_WS199_ann_gff.txt
- + Tests/GFF/c_elegans_WS199_dna_shortened.fa
- + Tests/GFF/c_elegans_WS199_shortened_gff.txt
- + Tests/GFF/ensembl_gtf.txt
- + Tests/GFF/glimmer_nokeyval.gff3
- + Tests/GFF/hybrid1.gff3
- + Tests/GFF/jgi_gff2.txt
- + Tests/GFF/mouse_extra_comma.gff3
- + Tests/GFF/ncbi_gff3.txt
- + Tests/GFF/problem_sequence_region.gff3
- + Tests/GFF/spaces.gff3
- + Tests/GFF/trans_splicing.gff3
- + Tests/GFF/transcripts.gff3
- + Tests/GFF/unescaped-semicolon.gff3
- + Tests/GFF/wormbase_gff2.txt
- + Tests/GFF/wormbase_gff2_alt.txt
- + Tests/test_GFFSeqIOFeatureAdder.py
- bcbio_gff.egg-info/PKG-INFO
- bcbio_gff.egg-info/SOURCES.txt
- bcbio_gff.egg-info/requires.txt
- setup.py


Changes:

=====================================
BCBio/GFF/GFFParser.py
=====================================
@@ -19,8 +19,10 @@ import os
 import copy
 import re
 import collections
+import io
 import itertools
 import warnings
+import six
 from six.moves import urllib
 # Make defaultdict compatible with versions of python older than 2.4
 try:
@@ -34,8 +36,10 @@ from Bio.SeqRecord import SeqRecord
 from Bio import SeqFeature
 from Bio import SeqIO
 from Bio import BiopythonDeprecationWarning
+
 warnings.simplefilter("ignore", BiopythonDeprecationWarning)
 
+
 def _gff_line_map(line, params):
     """Map part of Map-Reduce; parses a line of GFF into a dictionary.
 
@@ -46,6 +50,7 @@ def _gff_line_map(line, params):
         - determines the type of attribute (flat, parent, child or annotation)
         - generates a dictionary of GFF info which can be serialized as JSON
     """
+
     def _merge_keyvals(parts):
         """Merge key-values escaped by quotes that are improperly split at semicolons.
         """
@@ -62,6 +67,7 @@ def _gff_line_map(line, params):
         return out
 
     gff3_kw_pat = re.compile("\w+=")
+
     def _split_keyvals(keyval_str):
         """Split key-value pairs in a GFF2, GTF and GFF3 compatible way.
 
@@ -151,15 +157,14 @@ def _gff_line_map(line, params):
                         gff_parts["id"] = gff_parts["quals"][flat_name][0]
                         gff_parts["quals"]["ID"] = [gff_parts["id"]]
                 # children types
-                elif gff_parts["type"] in ["intron", "exon", "three_prime_UTR",
-                        "coding_exon", "five_prime_UTR", "CDS", "stop_codon",
-                        "start_codon"]:
+                elif gff_parts["type"] in ["intron", "exon", "three_prime_UTR", "coding_exon", "five_prime_UTR", "CDS",
+                                           "stop_codon", "start_codon"]:
                     gff_parts["quals"]["Parent"] = gff_parts["quals"][flat_name]
                 break
 
         return gff_parts
 
-    strand_map = {'+' : 1, '-' : -1, '?' : None, None: None}
+    strand_map = {'+': 1, '-': -1, '?': None, None: None}
     line = line.strip()
     if line[:2] == "##":
         return [('directive', line[2:])]
@@ -168,8 +173,7 @@ def _gff_line_map(line, params):
         should_do = True
         if params.limit_info:
             for limit_name, limit_values in params.limit_info.items():
-                cur_id = tuple([parts[i] for i in 
-                    params.filter_info[limit_name]])
+                cur_id = tuple([parts[i] for i in params.filter_info[limit_name]])
                 if cur_id not in limit_values:
                     should_do = False
                     break
@@ -200,8 +204,7 @@ def _gff_line_map(line, params):
             gff_info['rec_id'] = gff_parts[0]
             # if we are describing a location, then we are a feature
             if gff_parts[3] and gff_parts[4]:
-                gff_info['location'] = [int(gff_parts[3]) - 1,
-                        int(gff_parts[4])]
+                gff_info['location'] = [int(gff_parts[3]) - 1, int(gff_parts[4])]
                 gff_info['type'] = gff_parts[2]
                 gff_info['id'] = quals.get('ID', [''])[0]
                 gff_info['strand'] = strand_map.get(gff_parts[6], None)
@@ -232,6 +235,7 @@ def _gff_line_map(line, params):
                 return [(final_key, gff_info)]
     return []
 
+
 def _gff_line_reduce(map_results, out, params):
     """Reduce part of Map-Reduce; combines results of parsed features.
     """
@@ -248,6 +252,7 @@ def _gff_line_reduce(map_results, out, params):
             vals = simplejson.dumps(vals)
         out.add(key, vals)
 
+
 class _MultiIDRemapper:
     """Provide an ID remapping for cases where a parent has a non-unique ID.
 
@@ -255,6 +260,7 @@ class _MultiIDRemapper:
     by using the unique sequence region to assign children to the right
     parent.
     """
+
     def __init__(self, base_id, all_parents):
         self._base_id = base_id
         self._parents = all_parents
@@ -271,9 +277,11 @@ class _MultiIDRemapper:
         # if we haven't found a location match but parents are umabiguous, return that
         if len(self._parents) == 1:
             return self._base_id
-        raise ValueError("Did not find remapped ID location: %s, %s, %s" % (
-                self._base_id, [p['location'] for p in self._parents],
-                feature_dict['location']))
+        raise ValueError(
+            "Did not find remapped ID location: %s, %s, %s" %
+            (self._base_id, [p['location'] for p in self._parents], feature_dict['location'])
+        )
+
 
 class _AbstractMapReduceGFF:
     """Base class providing general GFF parsing for local and remote classes.
@@ -283,6 +291,7 @@ class _AbstractMapReduceGFF:
     the _gff_process function, which returns a dictionary of SeqRecord
     information.
     """
+
     def __init__(self, create_missing=True):
         """Initialize GFF parser 
 
@@ -311,8 +320,7 @@ class _AbstractMapReduceGFF:
         for rec in self.parse_in_parts(gff_files, base_dict, limit_info):
             yield rec
 
-    def parse_in_parts(self, gff_files, base_dict=None, limit_info=None,
-            target_lines=None):
+    def parse_in_parts(self, gff_files, base_dict=None, limit_info=None, target_lines=None):
         """Parse a region of a GFF file specified, returning info as generated.
 
         target_lines -- The number of lines in the file which should be used
@@ -363,8 +371,7 @@ class _AbstractMapReduceGFF:
         base = self._add_annotations(base, results.get('annotation', []))
         for feature in results.get('feature', []):
             (_, base) = self._add_toplevel_feature(base, feature)
-        base = self._add_parent_child_features(base, results.get('parent', []),
-                results.get('child', []))
+        base = self._add_parent_child_features(base, results.get('parent', []), results.get('child', []))
         base = self._add_seqs(base, results.get('fasta', []))
         base = self._add_directives(base, results.get('directive', []))
         return base
@@ -384,8 +391,11 @@ class _AbstractMapReduceGFF:
                 else:
                     val = tuple(parts[1:])
                 # specific directives that need special handling
-                if key == "sequence-region": # convert to Python 0-based coordinates
-                    val = (val[0], int(val[1]) - 1, int(val[2]))
+                if key == "sequence-region":  # convert to Python 0-based coordinates
+                    if len(val) == 2:  # handle regions missing contig
+                        val = (int(val[0]) - 1, int(val[1]))
+                    elif len(val) == 3:
+                        val = (val[0], int(val[1]) - 1, int(val[2]))
                 dir_keyvals[key].append(val)
         for key, vals in dir_keyvals.items():
             for rec in base.values():
@@ -414,18 +424,15 @@ class _AbstractMapReduceGFF:
                 if pid in multi_remap:
                     pid = multi_remap[pid].remap_id(child_dict)
                     child_feature.qualifiers['Parent'][pindex] = pid
-                children_prep[pid].append((child_dict['rec_id'],
-                                           child_feature))
+                children_prep[pid].append((child_dict['rec_id'], child_feature))
         children = dict(children_prep)
         # add children to parents that exist
         for cur_parent_dict in parents:
             cur_id = cur_parent_dict['id']
             if cur_id in multi_remap:
-                cur_parent_dict['id'] = multi_remap[cur_id].remap_id(
-                        cur_parent_dict)
+                cur_parent_dict['id'] = multi_remap[cur_id].remap_id(cur_parent_dict)
             cur_parent, base = self._add_toplevel_feature(base, cur_parent_dict)
-            cur_parent, children = self._add_children_to_parent(cur_parent,
-                                                                children)
+            cur_parent, children = self._add_children_to_parent(cur_parent, children)
         # create parents for children without them (GFF2 or split/bad files)
         while len(children) > 0:
             parent_id, cur_children = next(itertools.islice(children.items(), 1))
@@ -433,15 +440,12 @@ class _AbstractMapReduceGFF:
             if len(cur_children) == 1:
                 rec_id, child = cur_children[0]
                 loc = (child.location.nofuzzy_start, child.location.nofuzzy_end)
-                rec, base = self._get_rec(base,
-                                          dict(rec_id=rec_id, location=loc))
+                rec, base = self._get_rec(base, dict(rec_id=rec_id, location=loc))
                 rec.features.append(child)
                 del children[parent_id]
             else:
-                cur_parent, base = self._add_missing_parent(base, parent_id,
-                                                            cur_children)
-                cur_parent, children = self._add_children_to_parent(cur_parent,
-                                                                    children)
+                cur_parent, base = self._add_missing_parent(base, parent_id, cur_children)
+                cur_parent, children = self._add_children_to_parent(cur_parent, children)
         return base
 
     def _identify_dup_ids(self, parents):
@@ -454,8 +458,7 @@ class _AbstractMapReduceGFF:
         multi_ids = collections.defaultdict(list)
         for parent in parents:
             multi_ids[parent['id']].append(parent)
-        multi_ids = [(mid, ps) for (mid, ps) in multi_ids.items()
-                     if len(parents) > 1]
+        multi_ids = [(mid, ps) for (mid, ps) in multi_ids.items() if len(parents) > 1]
         multi_remap = dict()
         for mid, parents in multi_ids:
             multi_remap[mid] = _MultiIDRemapper(mid, parents)
@@ -525,13 +528,15 @@ class _AbstractMapReduceGFF:
         child_strands = list(set(c[1].strand for c in cur_children))
         inferred_strand = child_strands[0] if len(child_strands) == 1 else None
         assert len(base_rec_id) > 0
-        feature_dict = dict(id=parent_id, strand=inferred_strand,
-                            type="inferred_parent", quals=dict(ID=[parent_id]),
-                            rec_id=base_rec_id[0])
-        coords = [(c.location.nofuzzy_start, c.location.nofuzzy_end)
-                  for r, c in cur_children]
-        feature_dict["location"] = (min([c[0] for c in coords]),
-                                    max([c[1] for c in coords]))
+        feature_dict = dict(
+            id=parent_id,
+            strand=inferred_strand,
+            type="inferred_parent",
+            quals=dict(ID=[parent_id]),
+            rec_id=base_rec_id[0]
+        )
+        coords = [(c.location.nofuzzy_start, c.location.nofuzzy_end) for r, c in cur_children]
+        feature_dict["location"] = (min([c[0] for c in coords]), max([c[1] for c in coords]))
         return self._add_toplevel_feature(base, feature_dict)
 
     def _add_toplevel_feature(self, base, feature_dict):
@@ -546,8 +551,9 @@ class _AbstractMapReduceGFF:
         """Retrieve a Biopython feature from our dictionary representation.
         """
         location = SeqFeature.FeatureLocation(*feature_dict['location'])
-        new_feature = SeqFeature.SeqFeature(location, feature_dict['type'],
-                id=feature_dict['id'], strand=feature_dict['strand'])
+        new_feature = SeqFeature.SeqFeature(
+            location, feature_dict['type'], id=feature_dict['id'], strand=feature_dict['strand']
+        )
         # Support for Biopython 1.68 and above, which removed sub_features
         if not hasattr(new_feature, "sub_features"):
             new_feature.sub_features = []
@@ -559,9 +565,11 @@ class _AbstractMapReduceGFF:
         """
         return list(SeqIO.parse(in_handle, "fasta"))
 
+
 class _GFFParserLocalOut:
     """Provide a collector for local GFF MapReduce file parsing.
     """
+
     def __init__(self, smart_breaks=False):
         self._items = dict()
         self._smart_breaks = smart_breaks
@@ -619,13 +627,15 @@ class _GFFParserLocalOut:
         self._last_parent = None
         return self._items
 
+
 class GFFParser(_AbstractMapReduceGFF):
     """Local GFF parser providing standardized parsing of GFF3 and GFF2 files.
     """
+
     def __init__(self, line_adjust_fn=None, create_missing=True):
         _AbstractMapReduceGFF.__init__(self, create_missing=create_missing)
         self._line_adjust_fn = line_adjust_fn
-    
+
     def _gff_process(self, gff_files, limit_info, target_lines):
         """Process GFF addition without any parallelization.
 
@@ -655,40 +665,46 @@ class GFFParser(_AbstractMapReduceGFF):
             if need_close:
                 in_handle.close()
 
-    def _lines_to_out_info(self, line_iter, limit_info=None,
-            target_lines=None):
+    def _lines_to_out_info(self, line_iter, limit_info=None, target_lines=None):
         """Generate SeqRecord and SeqFeatures from GFF file lines.
         """
         params = self._examiner._get_local_params(limit_info)
-        out_info = _GFFParserLocalOut((target_lines is not None and
-                target_lines > 1))
+        out_info = _GFFParserLocalOut((target_lines is not None and target_lines > 1))
         found_seqs = False
         for line in line_iter:
             results = self._map_fn(line, params)
             if self._line_adjust_fn and results:
                 if results[0][0] not in ['directive']:
-                    results = [(results[0][0],
-                        self._line_adjust_fn(results[0][1]))]
+                    results = [(results[0][0], self._line_adjust_fn(results[0][1]))]
             self._reduce_fn(results, out_info, params)
-            if (target_lines and out_info.num_lines >= target_lines and
-                    out_info.can_break):
+            if (target_lines and out_info.num_lines >= target_lines and out_info.can_break):
                 yield out_info.get_results()
-                out_info = _GFFParserLocalOut((target_lines is not None and
-                        target_lines > 1))
-            if (results and results[0][0] == 'directive' and 
-                    results[0][1] == 'FASTA'):
+                out_info = _GFFParserLocalOut((target_lines is not None and target_lines > 1))
+            if (results and results[0][0] == 'directive' and results[0][1] == 'FASTA'):
                 found_seqs = True
                 break
 
         class FakeHandle:
+
             def __init__(self, line_iter):
                 self._iter = line_iter
+
             def __iter__(self):
                 return self
+
             def __next__(self):
                 return next(self._iter)
-            def read(self):
-                return "".join(l for l in self._iter)
+
+            next = __next__
+
+            def read(self, size=-1):
+                if size < 0:
+                    return "".join(l for l in self._iter)
+                elif size == 0:
+                    return ""  # Used by Biopython to sniff unicode vs bytes
+                else:
+                    raise NotImplementedError
+
             def readline(self):
                 try:
                     return next(self._iter)
@@ -701,9 +717,11 @@ class GFFParser(_AbstractMapReduceGFF):
         if out_info.has_items():
             yield out_info.get_results()
 
+
 class DiscoGFFParser(_AbstractMapReduceGFF):
     """GFF Parser with parallelization through Disco (http://discoproject.org.
     """
+
     def __init__(self, disco_host, create_missing=True):
         """Initialize parser.
         
@@ -720,32 +738,36 @@ class DiscoGFFParser(_AbstractMapReduceGFF):
         # make these imports local; only need them when using disco
         import simplejson
         import disco
-        # absolute path names unless they are special disco files 
+        # absolute path names unless they are special disco files
         full_files = []
         for f in gff_files:
             if f.split(":")[0] != "disco":
                 full_files.append(os.path.abspath(f))
             else:
                 full_files.append(f)
-        results = disco.job(self._disco_host, name="gff_reader",
-                input=full_files,
-                params=disco.Params(limit_info=limit_info, jsonify=True,
-                    filter_info=self._examiner._filter_info),
-                required_modules=["simplejson", "collections", "re"],
-                map=self._map_fn, reduce=self._reduce_fn)
+        results = disco.job(
+            self._disco_host,
+            name="gff_reader",
+            input=full_files,
+            params=disco.Params(limit_info=limit_info, jsonify=True, filter_info=self._examiner._filter_info),
+            required_modules=["simplejson", "collections", "re"],
+            map=self._map_fn,
+            reduce=self._reduce_fn
+        )
         processed = dict()
         for out_key, out_val in disco.result_iterator(results):
             processed[out_key] = simplejson.loads(out_val)
         yield processed
 
+
 def parse(gff_files, base_dict=None, limit_info=None, target_lines=None):
     """High level interface to parse GFF files into SeqRecords and SeqFeatures.
     """
     parser = GFFParser()
-    for rec in parser.parse_in_parts(gff_files, base_dict, limit_info,
-            target_lines):
+    for rec in parser.parse_in_parts(gff_files, base_dict, limit_info, target_lines):
         yield rec
 
+
 def parse_simple(gff_files, limit_info=None):
     """Parse GFF files as line by line dictionary of parts.
     """
@@ -756,18 +778,24 @@ def parse_simple(gff_files, limit_info=None):
             yield rec["child"][0]
         elif "parent" in rec:
             yield rec["parent"][0]
+        elif "feature" in rec:
+            yield rec["feature"][0]
         # ignore directive lines
         else:
             assert "directive" in rec
 
+
 def _file_or_handle(fn):
     """Decorator to handle either an input handle or a file.
     """
+
     def _file_or_handle_inside(*args, **kwargs):
         in_file = args[1]
         if hasattr(in_file, "read"):
             need_close = False
             in_handle = in_file
+            if six.PY3 and not isinstance(in_handle, io.TextIOBase):
+                raise TypeError('input handle must be opened in text mode')
         else:
             need_close = True
             in_handle = open(in_file)
@@ -776,8 +804,10 @@ def _file_or_handle(fn):
         if need_close:
             in_handle.close()
         return out
+
     return _file_or_handle_inside
 
+
 class GFFExaminer:
     """Provide high level details about a GFF file to refine parsing.
 
@@ -787,19 +817,22 @@ class GFFExaminer:
     information you need. This class provides high level summary details to
     help in learning.
     """
+
     def __init__(self):
-        self._filter_info = dict(gff_id = [0], gff_source_type = [1, 2],
-                gff_source = [1], gff_type = [2])
-    
+        self._filter_info = dict(gff_id=[0], gff_source_type=[1, 2], gff_source=[1], gff_type=[2])
+
     def _get_local_params(self, limit_info=None):
+
         class _LocalParams:
+
             def __init__(self):
                 self.jsonify = False
+
         params = _LocalParams()
         params.limit_info = limit_info
         params.filter_info = self._filter_info
         return params
-    
+
     @_file_or_handle
     def available_limits(self, gff_handle):
         """Return dictionary information on possible limits for this file.
@@ -856,16 +889,12 @@ class GFFExaminer:
             if line.startswith("##FASTA"):
                 break
             if line.strip() and not line.startswith("#"):
-                line_type, line_info = _gff_line_map(line,
-                        self._get_local_params())[0]
-                if (line_type == 'parent' or (line_type == 'child' and
-                        line_info['id'])):
-                    parent_sts[line_info['id']] = (
-                            line_info['quals'].get('source', [""])[0], line_info['type'])
+                line_type, line_info = _gff_line_map(line, self._get_local_params())[0]
+                if (line_type == 'parent' or (line_type == 'child' and line_info['id'])):
+                    parent_sts[line_info['id']] = (line_info['quals'].get('source', [""])[0], line_info['type'])
                 if line_type == 'child':
                     for parent_id in line_info['quals']['Parent']:
-                        child_sts[parent_id].append((
-                            line_info['quals'].get('source', [""])[0], line_info['type']))
+                        child_sts[parent_id].append((line_info['quals'].get('source', [""])[0], line_info['type']))
         #print parent_sts, child_sts
         # generate a dictionary of the unique final type relationships
         pc_map = collections.defaultdict(list)


=====================================
BCBio/GFF/__init__.py
=====================================
@@ -3,4 +3,4 @@
 from BCBio.GFF.GFFParser import GFFParser, DiscoGFFParser, GFFExaminer, parse, parse_simple
 from BCBio.GFF.GFFOutput import GFF3Writer, write
 
-__version__="0.6.6"
+__version__ = "0.6.7"


=====================================
PKG-INFO
=====================================
@@ -1,10 +1,10 @@
 Metadata-Version: 1.0
 Name: bcbio-gff
-Version: 0.6.6
+Version: 0.6.7
 Summary: Read and write Generic Feature Format (GFF) with Biopython integration.
 Home-page: https://github.com/chapmanb/bcbb/tree/master/gff
 Author: Brad Chapman
 Author-email: chapmanb at 50mail.com
-License: UNKNOWN
+License: Biopython License
 Description: UNKNOWN
 Platform: UNKNOWN


=====================================
Scripts/gff/access_gff_index.py
=====================================
@@ -0,0 +1,98 @@
+"""Access an GFF file using bx-python's interval indexing.
+
+Requires:
+    bx-python: http://bitbucket.org/james_taylor/bx-python/wiki/Home
+    gff library: http://github.com/chapmanb/bcbb/tree/master/gff
+
+Index time:
+  44 Mb file
+  11 seconds
+  Index is 7.5Mb
+"""
+from __future__ import with_statement
+import os
+import sys
+
+from bx import interval_index_file
+
+from BCBio import GFF
+
+def main(gff_file):
+    gff_index = gff_file + ".index"
+    if not os.path.exists(gff_index):
+        print "Indexing GFF file"
+        index(gff_file)
+    index = GFFIndexedAccess(gff_file, keep_open=True)
+    print index.seqids
+    print
+    for feature in index.get_features_in_region("Chr2", 17500, 20000):
+        print feature
+    for feature in index.get_features_in_region("Chr5", 500000, 502500):
+        print feature
+
+    exam = GFF.GFFExaminer()
+    #print exam.available_limits(gff_file)
+    #print exam.parent_child_map(gff_file)
+
+    found = 0
+    limit_info = dict(
+            gff_type = ["protein", "gene", "mRNA", "exon", "CDS", "five_prime_UTR",
+                "three_prime_UTR"]
+            )
+    for feature in index.get_features_in_region("Chr1", 0, 50000, 
+            limit_info):
+        found += 1
+    print found
+
+class GFFIndexedAccess(interval_index_file.AbstractIndexedAccess):
+    """Provide indexed access to a GFF file.
+    """
+    def __init__(self, *args, **kwargs):
+        interval_index_file.AbstractIndexedAccess.__init__(self, *args,
+                **kwargs)
+        self._parser = GFF.GFFParser()
+
+    @property
+    def seqids(self):
+        return self.indexes.indexes.keys()
+
+    def get_features_in_region(self, seqid, start, end, limit_info=None):
+        """Retrieve features located on a given region in start/end coordinates.
+        """
+        limit_info = self._parser._normalize_limit_info(limit_info)
+        line_gen = self.get_as_iterator(seqid, int(start), int(end))
+        recs = None
+        for results in self._parser._lines_to_out_info(line_gen, limit_info):
+            assert not recs, "Unexpected multiple results"
+            recs = self._parser._results_to_features(dict(), results)
+        if recs is None:
+            return []
+        else:
+            assert len(recs) == 1
+            rec = recs[seqid]
+            return rec.features
+
+    def read_at_current_offset(self, handle, **kwargs):
+        line = handle.readline()
+        return line
+
+def index(gff_file, index_file=None):
+    index = interval_index_file.Indexes()
+    with open(gff_file) as in_handle:
+        while 1:
+            pos = in_handle.tell()
+            line = in_handle.readline()
+            if not line:
+                break
+            if not line.startswith("#"):
+                parts = line.split("\t")
+                (seqid, gtype, source, start, end) = parts[:5]
+                index.add(seqid, int(start), int(end), pos)
+    if index_file is None:
+        index_file = gff_file + ".index"
+    with open(index_file, "w") as index_handle:
+        index.write(index_handle)
+    return index_file
+
+if __name__ == "__main__":
+    main(*sys.argv[1:])


=====================================
Scripts/gff/genbank_to_gff.py
=====================================
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+"""Convert a GenBank file into GFF format.
+
+Usage:
+    genbank_to_gff.py <genbank_file>
+"""
+import sys
+import os
+
+from Bio import SeqIO
+from Bio import Seq
+
+from BCBio import GFF
+
+def main(gb_file):
+    out_file = "%s.gff" % os.path.splitext(gb_file)[0]
+    with open(out_file, "w") as out_handle:
+        GFF.write(SeqIO.parse(gb_file, "genbank"), out_handle)
+
+if __name__ == "__main__":
+    main(*sys.argv[1:])


=====================================
Scripts/gff/gff2_to_gff3.py
=====================================
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+"""Convert a GFF2 file to an updated GFF3 format file.
+
+Usage:
+    gff2_to_gff3.py <in_gff2_file>
+
+The output file has the same name with the extension gff3.
+"""
+import sys
+import os
+
+from BCBio.GFF import GFFParser, GFF3Writer
+
+def main(in_file):
+    base, ext = os.path.splitext(in_file)
+    out_file = "%s.gff3" % (base)
+    in_handle = open(in_file)
+    out_handle = open(out_file, "w")
+    reader = GFFParser()
+    writer = GFF3Writer()
+    writer.write(reader.parse_in_parts(in_handle, target_lines=25000),
+            out_handle)
+    in_handle.close()
+    out_handle.close()
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print __doc__
+        sys.exit()
+    main(sys.argv[1])


=====================================
Scripts/gff/gff_to_biosql.py
=====================================
@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+"""Load a fasta file of sequences and associated GFF file into BioSQL.
+
+You will need to adjust the database parameters and have a BioSQL database set
+up. See:
+
+http://biopython.org/wiki/BioSQL
+
+Depending on the size of the sequences being loaded, you may also get errors on
+loading very large chromosome sequences. Updating these options can help:
+
+    set global max_allowed_packet=1000000000;
+    set global net_buffer_length=1000000;
+
+Usage:
+    gff_to_biosql.py <fasta file> <gff file>
+"""
+from __future__ import with_statement
+import sys
+
+from BioSQL import BioSeqDatabase
+from Bio import SeqIO
+
+from BCBio.GFF import GFFParser
+
+def main(seq_file, gff_file):
+    # -- To be customized
+    # You need to update these parameters to point to your local database
+    # XXX demo example could be swapped to use SQLite when that is integrated
+    user = "chapmanb"
+    passwd = "cdev"
+    host = "localhost"
+    db_name = "wb199_gff"
+    biodb_name = "wb199_gff_cds_pcr"
+    # These need to be updated to reflect what you would like to parse
+    # out of the GFF file. Set limit_info=None to parse everything, but
+    # be sure the file is small or you may deal with memory issues.
+    rnai_types = [('Orfeome', 'PCR_product'),
+                ('GenePair_STS', 'PCR_product'),
+                ('Promoterome', 'PCR_product')]
+    gene_types = [('Non_coding_transcript', 'gene'),
+                  ('Coding_transcript', 'gene'),
+                  ('Coding_transcript', 'mRNA'),
+                  ('Coding_transcript', 'CDS')]
+    limit_info = dict(gff_source_type = rnai_types + gene_types)
+    # --
+    print "Parsing FASTA sequence file..."
+    with open(seq_file) as seq_handle:
+        seq_dict = SeqIO.to_dict(SeqIO.parse(seq_handle, "fasta"))
+
+    print "Parsing GFF data file..."
+    parser = GFFParser()
+    recs = parser.parse(gff_file, seq_dict, limit_info=limit_info)
+
+    print "Writing to BioSQL database..."
+    server = BioSeqDatabase.open_database(driver="MySQLdb", user=user,
+            passwd=passwd, host=host, db=db_name)
+    try:
+        if biodb_name not in server.keys():
+            server.new_database(biodb_name)
+        else:
+            server.remove_database(biodb_name)
+            server.adaptor.commit()
+            server.new_database(biodb_name)
+        db = server[biodb_name]
+        db.load(recs)
+        server.adaptor.commit()
+    except:
+        server.adaptor.rollback()
+        raise
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print __doc__
+        sys.exit()
+    main(sys.argv[1], sys.argv[2])


=====================================
Scripts/gff/gff_to_genbank.py
=====================================
@@ -0,0 +1,73 @@
+#!/usr/bin/env python
+"""Convert a GFF and associated FASTA file into GenBank format.
+
+Usage:
+    gff_to_genbank.py <GFF annotation file> [<FASTA sequence file> <molecule type>]
+
+ FASTA sequence file: input sequences matching records in GFF. Optional if sequences
+   are in the GFF
+ molecule type: type of molecule in the GFF file. Defaults to DNA, the most common case.
+"""
+from __future__ import print_function
+
+import sys
+import os
+
+from Bio import SeqIO
+
+from BCBio import GFF
+
+
+def main(gff_file, fasta_file=None, molecule_type="DNA"):
+    out_file = "%s.gb" % os.path.splitext(gff_file)[0]
+    if fasta_file:
+        fasta_input = SeqIO.to_dict(SeqIO.parse(fasta_file, "fasta"))
+    else:
+        fasta_input = {}
+    gff_iter = GFF.parse(gff_file, fasta_input)
+    SeqIO.write(_check_gff(_fix_ncbi_id(gff_iter), molecule_type), out_file, "genbank")
+
+
+def _fix_ncbi_id(fasta_iter):
+    """GenBank identifiers can only be 16 characters; try to shorten NCBI.
+    """
+    for rec in fasta_iter:
+        if len(rec.name) > 16 and rec.name.find("|") > 0:
+            new_id = [x for x in rec.name.split("|") if x][-1]
+            print("Warning: shortening NCBI name %s to %s" % (rec.id, new_id))
+            rec.id = new_id
+            rec.name = new_id
+        yield rec
+
+
+def _check_gff(gff_iterator, molecule_type):
+    """Check GFF files before feeding to SeqIO to be sure they have sequences.
+    """
+    for rec in gff_iterator:
+        if "molecule_type" not in rec.annotations:
+            rec.annotations["molecule_type"] = molecule_type
+        yield _flatten_features(rec)
+
+
+def _flatten_features(rec):
+    """Make sub_features in an input rec flat for output.
+
+    GenBank does not handle nested features, so we want to make
+    everything top level.
+    """
+    out = []
+    for f in rec.features:
+        cur = [f]
+        while len(cur) > 0:
+            nextf = []
+            for curf in cur:
+                out.append(curf)
+                if len(curf.sub_features) > 0:
+                    nextf.extend(curf.sub_features)
+            cur = nextf
+    rec.features = out
+    return rec
+
+
+if __name__ == "__main__":
+    main(*sys.argv[1:])


=====================================
Tests/GFF/F3-unique-3.v2.gff
=====================================
@@ -0,0 +1,128 @@
+##solid-gff-version 0.2
+##gff-version 2
+##source-version MaToGff.java v1.5
+##date 2008-05-28
+##time 13:11:03
+##Type solid_read
+##color-code AA=0,AC=1,AG=2,AT=3,CA=1,CC=0,CG=3,CT=2,GA=2,GC=3,GG=0,GT=1,TA=3,TC=2,TG=1,TT=0
+##primer-base F3=T
+##max-num-mismatches 3
+##max-read-length 20
+##line-order fragment
+##history filter_fasta.pl --noduplicates --output=/data/results/DAEMON/DAEMON_MATE_PAIRS_2_20070326/S1/results.01/primary.20071218094706805 --name=DAEMON_MATE_PAIRS_2_20070326_S1 --tag=F3 --minlength=20 --prefix=T /data/results/DAEMON/DAEMON_MATE_PAIRS_2_20070326/S1/jobs/postPrimerSetPrimary.117/rawseq
+##history map /data/results/RegressionDriver/CaseManager/results/r12/integration/case0002/reads1/test_S1_F3.csfasta /data/results/RegressionDriver/CaseManager/knownData/validatedReference/matchingPipeline/ecoli_k12_MG1655.fasta T=30 L=19 C=1 E=.Tmpfile1211939575SVhDtd F=0 B=1 D=1 u=1 r=0 n=1 Z=1000 P="0000000111111111111" M=0 U=0.000000 H=0  > .Tmpfile1211939575SVhDtd.out.1
+##history MaToGff.java --sort --qvs=test_S1_F3_QV.qual.txt --convert=unique --clear=3 --tempdir=../tmp test_S1_F3.csfasta.ma.20.3
+##hdr seqname	source	feature	start	end	score	strand	frame	[attributes]	[comments]
+3_336_815_F3	solid	read	55409	55428	10.4	+	.	g=A3233312322232122211;i=1;p=1.000;q=23,12,18,17,10,24,19,14,27,9,23,9,16,20,11,7,8,4,4,14;u=0,0,0,1
+3_142_1011_F3	solid	read	91290	91309	5.0	-	.	g=T0330222333132222222;i=1;p=1.000;q=4,4,14,4,4,4,4,21,4,4,4,4,25,4,4,4,5,21,4,4;u=0,0,0,1
+3_341_424_F3	solid	read	102717	102736	10.6	-	.	g=T2203031313223113212;i=1;p=1.000;q=9,27,25,16,18,9,27,26,23,13,14,25,27,5,24,5,26,26,4,5;u=0,0,1
+3_6_37_F3	solid	read	181053	181072	9.4	+	.	g=C3220221332111020310;i=1;p=1.000;q=9,5,13,9,10,22,6,12,21,7,13,4,21,16,23,6,20,20,13,6;u=0,0,0,1
+3_34_202_F3	solid	read	284207	284226	6.9	+	.	g=G0301333332232122333;i=1;p=1.000;q=6,15,21,8,12,4,4,5,12,8,4,12,4,7,10,6,8,16,4,6;u=0,1
+3_277_712_F3	solid	read	304136	304155	11.8	-	.	g=A2033101122223322133;i=1;p=1.000;q=26,11,14,27,4,17,4,26,26,23,17,25,26,27,21,23,5,20,26,23;u=0,1
+3_394_71_F3	solid	read	308736	308755	10.8	+	.	g=T3203322323203312331;i=1;p=1.000;q=9,24,19,15,20,18,20,10,13,13,11,21,12,7,4,11,20,24,4,25;u=0,1
+3_285_1497_F3	solid	read	404055	404074	8.4	-	.	g=T1221231003202232221;i=1;p=1.000;q=8,10,6,25,16,14,23,27,8,14,21,19,5,4,4,6,22,12,4,6;u=0,0,0,1
+3_228_178_F3	solid	read	453227	453246	9.5	-	.	g=G1130333332331110323;i=1;p=1.000;q=4,19,25,18,18,5,19,6,8,24,4,26,21,11,15,4,26,13,13,15;u=0,0,0,1
+3_406_794_F3	solid	read	504835	504854	8.3	-	.	g=T3033331301320201111;i=1;p=1.000;q=27,4,13,4,21,11,7,11,5,26,10,8,9,4,6,18,9,26,17,6;u=0,0,0,1
+3_303_251_F3	solid	read	561501	561520	5.3	+	.	g=C0011111112222112221;i=1;p=1.000;q=9,8,4,4,10,4,4,4,6,14,4,4,4,4,16,4,4,4,4,23;u=0,0,1
+3_152_112_F3	solid	read	624012	624031	7.7	-	.	g=G0301122312213122221;i=1;p=1.000;q=22,14,7,13,18,5,11,4,15,6,6,11,4,8,15,5,10,4,6,24;u=0,0,0,1
+3_112_1154_F3	solid	read	630582	630601	11.3	-	.	g=T1333312011131131011;i=1;p=1.000;q=27,27,4,5,17,24,20,19,7,4,25,17,18,15,22,23,17,25,16,26;u=0,0,1
+3_196_392_F3	solid	read	661664	661683	19.7	-	.	g=T3321013301122133323;i=1;p=1.000;q=27,25,13,26,21,25,23,27,27,27,27,11,16,27,27,19,26,27,26,27;u=1
+3_192_1248_F3	solid	read	672037	672056	4.5	-	.	g=A0333232333121222222;i=1;p=1.000;q=4,7,4,4,4,4,4,4,6,4,4,4,4,4,7,7,4,4,6,4;u=0,0,0,1
+3_63_479_F3	solid	read	742582	742601	7.9	-	.	g=A0133333333233232332;i=1;p=1.000;q=4,9,6,11,20,12,11,9,13,20,18,4,4,14,9,15,4,6,21,4;u=0,0,0,1
+3_30_710_F3	solid	read	816069	816088	9.2	-	.	g=T3311001223313333313;i=1;p=1.000;q=22,27,18,25,25,7,26,25,14,23,6,25,5,11,7,4,15,7,4,6;u=0,0,0,1
+3_284_77_F3	solid	read	864876	864895	7.4	+	.	g=T2003133033233112331;i=1;p=1.000;q=13,19,4,11,22,24,6,16,4,6,13,4,12,18,4,6,7,11,4,5;u=0,0,0,1
+3_411_1040_F3	solid	read	876023	876042	10.9	-	.	g=T2121301233200033221;i=1;p=1.000;q=9,9,5,12,11,8,4,16,27,27,18,21,24,9,18,24,21,9,23,17;u=0,0,0,1
+3_188_171_F3	solid	read	884683	884702	5.8	-	.	g=A1322330132213322231;i=1;p=1.000;q=4,8,4,5,7,6,5,4,11,6,6,11,4,8,4,8,4,6,4,15;u=0,0,0,1
+3_63_787_F3	solid	read	1022149	1022168	7.5	+	.	g=C3131132013020123031;i=1;p=1.000;q=12,13,26,14,9,9,13,14,4,7,8,5,11,4,17,4,4,6,4,21;u=0,1
+3_391_2015_F3	solid	read	1074989	1075008	18.5	-	.	g=A2323101222321232322;i=1;p=1.000;q=27,25,18,20,27,27,24,23,27,23,27,25,19,26,12,26,9,21,27,21;u=1
+3_8_425_F3	solid	read	1119124	1119143	6.7	-	.	g=T0321201132230303323;i=1;p=1.000;q=6,5,8,6,4,4,23,9,12,10,15,4,13,13,8,4,4,5,5,12;u=0,0,1
+3_53_745_F3	solid	read	1130179	1130198	7.6	-	.	g=C0213313233333113321;i=1;p=1.000;q=27,6,9,22,18,9,8,15,6,8,14,5,8,6,16,4,5,4,4,14;u=0,0,0,1
+3_123_576_F3	solid	read	1219122	1219141	8.7	+	.	g=A3333133323333323323;i=1;p=1.000;q=18,22,5,11,16,16,8,14,8,5,19,8,9,10,7,11,6,11,9,4;u=0,0,1
+3_81_12_F3	solid	read	1236732	1236751	8.6	+	.	g=G2210332302233112321;i=1;p=1.000;q=7,16,17,9,7,9,9,16,9,4,10,21,17,8,4,6,9,16,6,12;u=0,0,0,1
+3_96_1862_F3	solid	read	1264409	1264428	6.9	-	.	g=G0301032323231222021;i=1;p=1.000;q=26,23,11,20,15,8,6,4,6,6,9,7,6,4,8,6,4,5,6,5;u=0,0,0,1
+3_40_136_F3	solid	read	1266177	1266196	7.4	-	.	g=T2332222332203312221;i=1;p=1.000;q=9,23,6,19,13,9,4,8,17,9,4,4,13,9,8,5,4,6,10,8;u=0,0,1
+3_124_1781_F3	solid	read	1385416	1385435	10.3	+	.	g=A1322302333332222132;i=1;p=1.000;q=13,17,8,6,5,9,24,4,7,9,18,27,18,16,16,23,18,18,11,23;u=0,0,1
+3_134_1165_F3	solid	read	1393169	1393188	9.0	-	.	g=T3301123202321131311;i=1;p=1.000;q=4,27,18,7,27,4,27,26,4,20,4,27,26,9,27,4,27,14,10,27;u=1
+3_224_587_F3	solid	read	1490044	1490063	6.1	+	.	g=G2032313231111233321;i=1;p=1.000;q=4,4,6,6,13,24,4,4,5,15,6,7,9,14,4,4,4,25,5,5;u=0,0,0,1
+3_25_747_F3	solid	read	1513598	1513617	9.5	+	.	g=T1223213101133121231;i=1;p=1.000;q=26,27,8,27,27,27,26,27,26,19,8,14,4,17,11,5,7,4,7,6;u=0,0,1
+3_143_14_F3	solid	read	1528236	1528255	9.7	+	.	g=T3233113323230202011;i=1;p=1.000;q=13,23,17,19,23,16,24,25,14,15,9,6,4,11,4,9,12,4,16,10;u=0,0,0,1
+3_164_1025_F3	solid	read	1570107	1570126	7.9	-	.	g=T3220332323303320231;i=1;p=1.000;q=7,10,20,8,4,24,4,4,21,6,26,22,9,6,11,9,6,4,17,14;u=0,0,0,1
+3_137_552_F3	solid	read	1630276	1630295	9.1	-	.	g=G3030333223233102131;i=1;p=1.000;q=6,28,9,4,6,26,27,6,10,9,27,21,6,16,9,25,6,7,23,12;u=0,0,0,1
+3_125_1810_F3	solid	read	1634104	1634123	10.5	+	.	g=G1232220322032311332;i=1;p=1.000;q=27,8,26,26,10,6,26,12,27,27,26,4,27,27,23,8,8,4,27,12;u=0,0,0,1
+3_314_1310_F3	solid	read	1639981	1640000	9.2	+	.	g=A2221332230322203033;i=1;p=1.000;q=19,12,6,27,11,27,6,11,5,6,9,13,27,27,8,18,5,22,4,27;u=0,0,0,1
+3_384_591_F3	solid	read	1654341	1654360	6.8	+	.	g=A3323221133121102313;i=1;p=1.000;q=19,8,7,7,15,4,20,7,4,6,14,7,19,6,8,4,5,9,4,4;u=0,0,0,1
+3_145_739_F3	solid	read	1791040	1791059	11.9	-	.	g=A0221223333323131212;i=1;p=1.000;q=20,27,23,13,27,14,27,28,27,25,12,24,8,16,8,4,8,21,9,11;u=0,0,0,1
+3_326_2020_F3	solid	read	1830564	1830583	9.3	+	.	g=A3321322331103233322;i=1;p=1.000;q=14,4,25,16,10,12,16,5,14,10,25,5,25,5,9,18,13,26,4,26;u=0,0,0,1
+3_233_1265_F3	solid	read	1857564	1857583	8.9	+	.	g=T3112113020130223311;i=1;p=1.000;q=7,27,25,26,27,14,26,27,27,27,4,6,5,10,17,4,5,7,6,12;u=0,0,1
+3_235_100_F3	solid	read	1912460	1912479	9.6	-	.	g=G2233020000132311231;i=1;p=1.000;q=23,24,25,16,17,6,21,25,9,4,6,11,8,19,6,6,19,14,13,6;u=0,0,0,1
+3_111_107_F3	solid	read	1944496	1944515	7.6	-	.	g=C3023223333211322231;i=1;p=1.000;q=15,5,6,14,5,13,4,12,11,4,9,9,11,12,4,11,11,13,6,6;u=0,0,0,1
+3_457_1514_F3	solid	read	1956598	1956617	9.9	-	.	g=T0013331013332110221;i=1;p=1.000;q=18,24,10,24,23,25,22,11,20,10,15,11,4,5,27,4,9,13,5,27;u=0,1
+3_183_74_F3	solid	read	1992040	1992059	9.8	+	.	g=C3332233131131222322;i=1;p=1.000;q=27,27,25,23,25,8,11,11,7,11,4,12,14,10,15,7,14,4,9,12;u=0,0,1
+3_357_1303_F3	solid	read	2037917	2037936	10.9	-	.	g=T3331331323320311331;i=1;p=1.000;q=7,27,5,19,26,8,27,12,14,27,8,27,23,9,19,4,26,20,9,27;u=0,0,0,1
+3_153_186_F3	solid	read	2083441	2083460	6.7	+	.	g=T3112233331133323322;i=1;p=1.000;q=7,14,19,7,12,6,11,4,11,8,4,6,6,4,11,4,6,4,4,18;u=0,1
+3_65_1741_F3	solid	read	2107441	2107460	8.4	+	.	g=T3333332330233132123;i=1;p=1.000;q=4,4,6,25,9,4,26,16,21,9,18,15,27,27,4,21,9,7,9,6;u=0,0,0,1
+3_98_323_F3	solid	read	2118821	2118840	7.5	+	.	g=A3222212322131112031;i=1;p=1.000;q=13,14,8,10,8,14,4,13,10,7,15,4,6,4,4,12,6,11,6,8;u=0,0,1
+3_48_258_F3	solid	read	2153882	2153901	9.4	-	.	g=G0330113313201122321;i=1;p=1.000;q=22,15,20,4,16,17,14,24,4,5,4,22,19,8,10,9,13,22,8,15;u=0,0,0,1
+3_140_1125_F3	solid	read	2182909	2182928	7.9	+	.	g=T3231331302232001131;i=1;p=1.000;q=10,4,12,6,4,12,13,6,18,5,8,11,4,26,6,25,5,18,11,12;u=0,0,0,1
+3_359_118_F3	solid	read	2188393	2188412	8.4	+	.	g=A0301311133331131322;i=1;p=1.000;q=11,5,7,13,20,6,6,25,8,18,9,15,27,9,6,7,15,17,4,4;u=0,0,0,1
+3_203_483_F3	solid	read	2272874	2272893	9.1	-	.	g=C3031223110333133311;i=1;p=1.000;q=23,21,25,27,10,5,22,15,17,18,5,18,17,5,19,4,4,13,4,22;u=0,0,0,1
+3_66_301_F3	solid	read	2286038	2286057	6.6	-	.	g=C1113113330132222311;i=1;p=1.000;q=10,4,6,4,8,13,9,4,10,9,4,6,13,9,5,6,11,6,4,9;u=0,0,0,1
+3_78_130_F3	solid	read	2291021	2291040	7.6	+	.	g=G3233131332212222321;i=1;p=1.000;q=13,16,6,12,17,11,10,4,12,8,13,4,8,6,4,4,12,10,4,11;u=0,0,0,1
+3_141_110_F3	solid	read	2291354	2291373	9.3	+	.	g=T1312203322212123321;i=1;p=1.000;q=9,21,24,11,16,4,23,27,16,16,8,22,6,10,16,4,9,4,7,25;u=0,0,1
+3_51_1383_F3	solid	read	2374918	2374937	8.8	+	.	g=T3311203033322222231;i=1;p=1.000;q=24,26,6,27,27,23,27,4,21,27,4,27,6,9,24,4,23,4,4,27;u=0,0,1
+3_231_366_F3	solid	read	2392091	2392110	10.0	-	.	g=T2022333223101331322;i=1;p=1.000;q=18,12,9,9,13,8,7,22,7,7,4,26,12,17,9,20,24,8,18,14;u=0,0,0,1
+3_214_1802_F3	solid	read	2394604	2394623	8.8	-	.	g=T1232111001220211133;i=1;p=1.000;q=17,18,14,6,19,4,21,4,6,12,11,4,26,20,9,18,7,16,5,18;u=0,0,0,1
+3_67_1434_F3	solid	read	2454508	2454527	15.2	-	.	g=T3121311232222231203;i=1;p=1.000;q=9,27,27,18,16,14,25,27,26,21,19,27,27,27,15,5,24,27,24,24;u=0,0,1
+3_124_1647_F3	solid	read	2493617	2493636	7.5	+	.	g=A0211320203220231332;i=1;p=1.000;q=9,12,12,9,6,14,12,7,4,4,12,9,4,9,16,4,4,9,9,16;u=0,0,0,1
+3_39_328_F3	solid	read	2500759	2500778	7.8	+	.	g=T1332333033231132333;i=1;p=1.000;q=24,27,26,26,25,21,7,8,4,5,20,4,11,6,8,4,6,4,11,7;u=0,0,1
+3_378_322_F3	solid	read	2541624	2541643	8.9	+	.	g=T2333331001023011220;i=1;p=1.000;q=14,6,13,25,27,4,24,22,14,19,9,23,15,6,8,4,22,4,4,20;u=0,0,0,1
+3_216_848_F3	solid	read	2550573	2550592	11.5	-	.	g=G2320322020031220322;i=1;p=1.000;q=21,24,8,21,20,25,18,6,24,14,21,9,7,18,8,18,7,9,19,12;u=0,0,0,1
+3_221_516_F3	solid	read	2607559	2607578	11.1	-	.	g=T2132333313222333332;i=1;p=1.000;q=9,19,27,26,24,26,26,25,25,26,21,4,6,10,21,6,20,13,5,24;u=0,0,0,1
+3_56_45_F3	solid	read	2662103	2662122	5.5	+	.	g=G3021122332232122321;i=1;p=1.000;q=4,4,4,6,4,6,4,5,18,9,4,16,10,4,4,4,12,4,6,6;u=0,0,0,1
+3_127_210_F3	solid	read	2798906	2798925	10.2	+	.	g=G2331321333232203222;i=1;p=1.000;q=11,25,9,4,23,16,26,14,7,22,9,25,9,8,21,8,15,17,4,26;u=0,0,1
+3_417_422_F3	solid	read	2812322	2812341	8.8	-	.	g=T3321222333313333132;i=1;p=1.000;q=9,26,7,19,7,13,23,4,25,4,6,19,4,16,15,15,23,4,19,13;u=0,0,0,1
+3_42_1403_F3	solid	read	2830264	2830283	9.6	-	.	g=T3212330132120221212;i=1;p=1.000;q=7,4,25,18,6,17,12,12,17,14,8,26,13,15,10,4,21,5,12,22;u=0,1
+3_457_42_F3	solid	read	2874245	2874264	7.6	-	.	g=G0301123332223122221;i=1;p=1.000;q=18,10,14,9,19,4,10,8,11,10,6,8,5,8,11,4,13,6,4,6;u=0,0,1
+3_361_728_F3	solid	read	2893879	2893898	14.6	+	.	g=C3213223312310132221;i=1;p=1.000;q=14,18,7,7,17,19,23,24,17,26,12,15,21,23,21,19,17,20,22,24;u=0,0,0,1
+3_77_718_F3	solid	read	2913092	2913111	9.4	+	.	g=T3021331333313131231;i=1;p=1.000;q=15,26,7,24,20,18,5,6,17,18,6,11,4,13,19,15,7,4,22,25;u=0,0,0,1
+3_116_154_F3	solid	read	2917672	2917691	9.8	-	.	g=A0323231223233132311;i=1;p=1.000;q=20,9,19,18,10,18,8,16,25,6,18,6,12,24,6,7,5,15,7,17;u=0,0,0,1
+3_239_1415_F3	solid	read	2923256	2923275	19.2	+	.	g=T3233113121300032200;i=1;p=1.000;q=25,27,27,26,27,24,27,27,25,27,22,27,21,26,22,19,26,9,14,21;u=1
+3_142_1468_F3	solid	read	2930117	2930136	10.5	-	.	g=A3233323333303103330;i=1;p=1.000;q=9,20,6,26,16,18,8,13,20,25,25,18,6,12,11,18,4,16,16,6;u=0,0,1
+3_394_295_F3	solid	read	2930118	2930137	8.1	-	.	g=T3023333333333311331;i=1;p=1.000;q=4,14,6,12,7,22,10,4,13,24,18,12,12,4,6,9,9,9,14,4;u=0,0,0,1
+3_222_1773_F3	solid	read	2934040	2934059	11.6	+	.	g=T1303031311123232302;i=1;p=1.000;q=11,10,24,15,28,6,19,5,13,27,8,26,8,22,25,27,26,27,8,13;u=0,0,0,1
+3_276_1344_F3	solid	read	2969950	2969969	13.2	-	.	g=G3211212131233322233;i=1;p=1.000;q=27,27,12,16,11,23,27,8,23,12,27,22,20,12,15,25,8,27,16,6;u=0,1
+3_155_1814_F3	solid	read	3107393	3107412	13.6	+	.	g=A2332222213113120221;i=1;p=1.000;q=27,26,20,25,26,27,12,27,26,18,26,4,27,10,23,26,6,23,26,26;u=0,0,0,1
+3_373_2014_F3	solid	read	3143956	3143975	12.0	-	.	g=T3013322223222221211;i=1;p=1.000;q=16,8,17,21,10,10,18,18,18,13,4,23,16,24,8,19,14,15,23,11;u=0,1
+3_81_1637_F3	solid	read	3413619	3413638	9.1	+	.	g=G2313032322122302111;i=1;p=1.000;q=9,4,7,19,27,6,11,5,12,15,20,27,8,27,6,16,6,27,21,6;u=0,0,1
+3_291_969_F3	solid	read	3438323	3438342	17.4	+	.	g=T0021120212032121313;i=1;p=1.000;q=24,27,6,27,27,27,27,13,27,27,25,27,26,27,27,20,23,26,27,20;u=1
+3_179_1617_F3	solid	read	3475164	3475183	8.0	+	.	g=A2100132222332123123;i=1;p=1.000;q=21,25,11,22,4,19,7,21,20,4,5,24,25,16,4,4,11,19,4,4;u=0,0,0,1
+3_446_861_F3	solid	read	3476173	3476192	11.6	-	.	g=G1213302212022132321;i=1;p=1.000;q=27,27,27,27,26,25,12,27,24,18,24,6,27,26,20,9,6,6,4,23;u=0,0,1
+3_397_317_F3	solid	read	3545152	3545171	11.1	+	.	g=T3110031332233111131;i=1;p=1.000;q=22,27,9,9,26,5,22,20,9,10,16,22,24,6,23,25,22,4,17,18;u=0,0,0,1
+3_323_713_F3	solid	read	3575287	3575306	16.2	-	.	g=A0322222200213223302;i=1;p=1.000;q=27,25,21,27,26,26,24,26,27,18,27,26,26,27,22,22,6,26,25,8;u=0,1
+3_294_1906_F3	solid	read	3727542	3727561	8.4	-	.	g=A3030310223202311021;i=1;p=1.000;q=14,7,5,4,7,18,4,6,13,6,12,12,10,11,15,14,16,7,9,12;u=0,0,0,1
+3_443_223_F3	solid	read	3730805	3730824	17.1	-	.	g=T1113320033330133111;i=1;p=1.000;q=28,27,18,27,27,27,20,26,27,14,25,16,19,19,8,23,16,21,16,15;u=0,0,1
+3_94_809_F3	solid	read	3841898	3841917	21.8	-	.	g=A2032223110001131310;i=1;p=1.000;q=27,27,27,27,26,27,25,24,27,27,27,25,27,27,27,12,23,16,27,27;u=0,0,0,1
+3_245_387_F3	solid	read	3878549	3878568	24.4	-	.	g=A0222211220333132122;i=1;p=1.000;q=27,27,26,27,26,27,27,25,27,25,26,27,18,21,26,25,26,23,24,24;u=1
+3_190_1089_F3	solid	read	3900038	3900057	13.7	-	.	g=T1111110323122301202;i=1;p=1.000;q=27,11,27,11,8,9,27,9,9,26,25,27,11,27,23,14,24,20,22,26;u=0,0,1
+3_442_1501_F3	solid	read	3912610	3912629	8.5	+	.	g=A0012333103302132301;i=1;p=1.000;q=11,11,15,19,15,6,12,10,4,11,21,5,9,16,7,14,4,4,8,19;u=0,0,1
+3_342_678_F3	solid	read	4044575	4044594	4.0	+	.	g=A3333112332213322323;i=1;p=1.000;q=4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4;u=0,0,0,1
+3_56_1294_F3	solid	read	4058789	4058808	12.7	+	.	g=G3323331232322213322;i=1;p=1.000;q=26,17,18,27,23,8,8,24,27,27,9,27,25,14,26,4,27,9,24,23;u=0,0,0,1
+3_69_1575_F3	solid	read	4070467	4070486	9.9	+	.	g=A2222011012222112121;i=1;p=1.000;q=16,25,14,9,9,9,21,9,4,24,6,21,13,6,27,10,19,8,6,27;u=0,0,0,1
+3_198_476_F3	solid	read	4080622	4080641	8.9	+	.	g=C2010231122212011133;i=1;p=1.000;q=16,8,8,16,12,17,4,16,12,15,10,4,9,6,4,25,9,9,23,11;u=0,1
+3_24_715_F3	solid	read	4136503	4136522	4.0	-	.	g=G1313332132232313233;i=1;p=1.000;q=4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4;u=0,0,0,1
+3_151_283_F3	solid	read	4148264	4148283	9.7	+	.	g=T3230210232022111220;i=1;p=1.000;q=9,14,6,25,25,19,6,4,16,11,12,20,10,13,26,19,6,4,19,14;u=0,0,1
+3_164_774_F3	solid	read	4156157	4156176	9.6	+	.	g=G2311112210110223313;i=1;p=1.000;q=8,24,19,7,6,16,12,9,4,8,26,14,26,24,7,18,6,16,14,7;u=0,0,0,1
+3_275_1212_F3	solid	read	4171385	4171404	8.3	+	.	g=G0223122231333302232;i=1;p=1.000;q=13,8,5,4,10,7,12,25,4,25,6,15,6,27,6,11,12,7,14,10;u=0,0,0,1
+3_148_289_F3	solid	read	4177672	4177691	8.0	-	.	g=T1203101332223323323;i=1;p=1.000;q=9,21,11,6,5,7,25,24,26,24,8,9,7,12,7,4,11,9,4,4;u=0,0,0,1
+3_437_1000_F3	solid	read	4179623	4179642	12.3	+	.	g=A0112222212231131001;i=1;p=1.000;q=26,27,26,27,4,27,17,6,22,13,27,24,6,27,21,27,22,15,24,9;u=0,0,1
+3_318_2011_F3	solid	read	4218181	4218200	12.9	-	.	g=T2133330223033303323;i=1;p=1.000;q=25,27,27,5,5,16,27,16,27,15,18,25,26,11,27,19,16,24,9,15;u=0,0,0,1
+3_14_11_F3	solid	read	4222697	4222716	7.8	-	.	g=T2323310222232322122;i=1;p=1.000;q=6,23,16,25,25,9,7,4,12,4,14,6,10,7,6,9,18,4,10,4;u=0,0,0,1
+3_402_391_F3	solid	read	4274545	4274564	6.2	-	.	g=C3303323321111111111;i=1;p=1.000;q=10,19,15,15,7,8,13,4,7,4,5,16,4,4,5,4,9,4,4,4;u=0,0,0,1
+3_293_504_F3	solid	read	4339235	4339254	9.5	+	.	g=C2133223303331120213;i=1;p=1.000;q=6,4,5,26,13,7,17,6,24,10,27,24,5,9,21,9,23,24,20,14;u=0,0,0,1
+3_360_914_F3	solid	read	4407004	4407023	10.7	+	.	g=T3012102130232022001;i=1;p=1.000;q=23,24,19,17,24,6,26,17,25,15,7,24,14,11,26,9,22,4,8,5;u=0,0,0,1
+3_118_1532_F3	solid	read	4431702	4431721	10.2	+	.	g=C3233220201223200322;i=1;p=1.000;q=20,9,17,22,17,23,13,4,9,5,16,11,10,6,17,7,9,22,27,27;u=0,0,1
+3_358_133_F3	solid	read	4460191	4460210	9.1	+	.	g=T0221223112322112233;i=1;p=1.000;q=6,23,12,22,7,6,7,4,13,5,9,23,12,9,24,8,14,7,20,26;u=0,0,0,1
+3_397_195_F3	solid	read	4499390	4499409	6.9	-	.	g=T3302332313332212121;i=1;p=1.000;q=23,14,15,5,9,8,6,4,4,13,4,16,13,16,4,7,4,12,4,5;u=0,0,0,1
+3_158_642_F3	solid	read	4533144	4533163	7.1	-	.	g=A1332103332323233212;i=1;p=1.000;q=8,20,9,22,8,14,4,16,17,4,8,13,7,8,4,12,5,4,4,4;u=0,0,0,1
+3_300_1439_F3	solid	read	4580452	4580471	12.3	-	.	g=A0331111211302100201;i=1;p=1.000;q=5,17,21,14,4,16,11,27,21,9,17,17,27,23,12,21,16,27,25,25;u=0,0,0,1
+# Elapsed time 0.846 secs


=====================================
Tests/GFF/c_elegans_WS199_ann_gff.txt
=====================================
@@ -0,0 +1,2 @@
+# modified GFF file to remove location coordinates and test annotations
+I	Expr_profile	experimental_result_region	.	.	.	+	.	expr_profile=B0019.1


=====================================
Tests/GFF/c_elegans_WS199_dna_shortened.fa
=====================================
@@ -0,0 +1,21 @@
+>I
+gcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaa
+gcctaagcctaagcctaagcctaagcctaagcctaagcct
+>II
+cctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaag
+cctaagcctaagcctaagcctaagcctaagcctaagccta
+>III
+cctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaag
+cctaagcctaagcctaagcctaagcctaagcctaagccta
+>IV
+cctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaag
+cctaagcctaagcctaagcctaagcctaagcctaagccta
+>V
+gaattcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagc
+ctaagcctaagcctaagcctaagcctaagcctaagcctaa
+>X
+ctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagc
+ctaagcctaagcctaagcctaagcctaagcctaagcctaa
+>MtDNA
+cagtaaatagtttaataaaaatatagcatttgggttgctaagatattattactgatagaa
+tttttagtttaatttagaatgtatcacttacaatgatggg


=====================================
Tests/GFF/c_elegans_WS199_shortened_gff.txt
=====================================
@@ -0,0 +1,177 @@
+I	Orfeome	PCR_product	12759747	12764936	.	-	.	amplified=1;pcr_product=mv_B0019.1
+I	SAGE_tag_unambiguously_mapped	SAGE_tag	12763533	12763553	.	-	.	count=1;gene=amx-2;sequence=SAGE:ggcagagtcttttggca;transcript=B0019.1
+I	SAGE_tag_unambiguously_mapped	SAGE_tag	12761492	12761512	.	-	.	count=5;gene=amx-2;sequence=SAGE:aacggagccgtacacgc;transcript=B0019.1
+I	SAGE_tag_most_three_prime	SAGE_tag	12761499	12761512	.	-	.	count=9;gene=amx-2;sequence=SAGE:aacggagccg;transcript=B0019.1
+X	SAGE_tag	SAGE_tag	6819353	6819366	.	+	.	count=9;gene=amx-2;sequence=SAGE:aacggagccg;transcript=B0019.1
+I	Expr_profile	experimental_result_region	12762449	12764118	.	+	.	expr_profile=B0019.1
+I	Coding_transcript	CDS	12759745	12759828	.	-	0	ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
+I	Coding_transcript	CDS	12759949	12760013	.	-	2	ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
+I	Coding_transcript	CDS	12760227	12760319	.	-	2	ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
+I	Coding_transcript	CDS	12760365	12760494	.	-	0	ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
+I	Coding_transcript	CDS	12760834	12760904	.	-	2	ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
+I	Coding_transcript	CDS	12761172	12761516	.	-	2	ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
+I	Coding_transcript	CDS	12761799	12761953	.	-	1	ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
+I	Coding_transcript	CDS	12762127	12762268	.	-	2	ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
+I	Coding_transcript	CDS	12762648	12762806	.	-	2	ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
+I	Coding_transcript	CDS	12763112	12763249	.	-	2	ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
+I	Coding_transcript	CDS	12763448	12763655	.	-	0	ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
+I	Coding_transcript	CDS	12763729	12763882	.	-	1	ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
+I	Coding_transcript	CDS	12763979	12764102	.	-	2	ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
+I	Coding_transcript	CDS	12764291	12764471	.	-	0	ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
+I	Coding_transcript	CDS	12764812	12764937	.	-	0	ID=CDS:B0019.1;Parent=Transcript:B0019.1;locus=amx-2;status=Partially_confirmed;wormpep=CE:CE40797
+I	history	CDS	12759745	12759828	.	-	0	ID=CDS:B0019.1:wp173
+I	history	CDS	12759949	12760013	.	-	2	ID=CDS:B0019.1:wp173
+I	history	CDS	12760227	12760319	.	-	2	ID=CDS:B0019.1:wp173
+I	history	CDS	12760365	12760494	.	-	0	ID=CDS:B0019.1:wp173
+I	history	CDS	12760834	12760904	.	-	2	ID=CDS:B0019.1:wp173
+I	history	CDS	12761172	12761516	.	-	2	ID=CDS:B0019.1:wp173
+I	history	CDS	12761577	12761626	.	-	1	ID=CDS:B0019.1:wp173
+I	history	CDS	12761795	12761953	.	-	1	ID=CDS:B0019.1:wp173
+I	history	CDS	12762127	12762268	.	-	2	ID=CDS:B0019.1:wp173
+I	history	CDS	12762648	12762806	.	-	2	ID=CDS:B0019.1:wp173
+I	history	CDS	12763112	12763249	.	-	2	ID=CDS:B0019.1:wp173
+I	history	CDS	12763448	12763655	.	-	0	ID=CDS:B0019.1:wp173
+I	history	CDS	12763729	12763882	.	-	1	ID=CDS:B0019.1:wp173
+I	history	CDS	12763979	12764102	.	-	2	ID=CDS:B0019.1:wp173
+I	history	CDS	12764291	12764471	.	-	0	ID=CDS:B0019.1:wp173
+I	history	CDS	12764812	12764937	.	-	0	ID=CDS:B0019.1:wp173
+I	history	CDS	12759745	12759828	.	-	0	ID=CDS:B0019.1:wp90
+I	history	CDS	12759949	12760013	.	-	2	ID=CDS:B0019.1:wp90
+I	history	CDS	12760227	12760319	.	-	2	ID=CDS:B0019.1:wp90
+I	history	CDS	12761172	12761516	.	-	2	ID=CDS:B0019.1:wp90
+I	history	CDS	12761577	12761626	.	-	1	ID=CDS:B0019.1:wp90
+I	history	CDS	12761795	12761953	.	-	1	ID=CDS:B0019.1:wp90
+I	history	CDS	12762127	12762268	.	-	2	ID=CDS:B0019.1:wp90
+I	history	CDS	12762648	12762806	.	-	2	ID=CDS:B0019.1:wp90
+I	history	CDS	12763112	12763249	.	-	2	ID=CDS:B0019.1:wp90
+I	history	CDS	12763469	12763655	.	-	0	ID=CDS:B0019.1:wp90
+I	history	CDS	12763729	12763882	.	-	1	ID=CDS:B0019.1:wp90
+I	history	CDS	12763979	12764102	.	-	2	ID=CDS:B0019.1:wp90
+I	history	CDS	12764291	12764471	.	-	0	ID=CDS:B0019.1:wp90
+I	history	CDS	12764812	12764937	.	-	0	ID=CDS:B0019.1:wp90
+I	mass_spec_genome	translated_nucleotide_match	12761920	12761953	.	-	.	ID=Target:381130;Target=Mass_spec_peptide:MSP:FADFSPLDVSDVNFATDDLAK 10 21 +;Note=MSP:FADFSPLDVSDVNFATDDLAK;cds_matches=B0019.1;protein_matches=WP:CE40797;times_observed=3
+I	mass_spec_genome	translated_nucleotide_match	12762127	12762155	.	-	.	ID=Target:381130;Target=Mass_spec_peptide:MSP:FADFSPLDVSDVNFATDDLAK 1 10 +;Note=MSP:FADFSPLDVSDVNFATDDLAK;cds_matches=B0019.1;protein_matches=WP:CE40797;times_observed=3
+I	mass_spec_genome	translated_nucleotide_match	12763506	12763559	.	-	.	ID=Target:381133;Target=Mass_spec_peptide:MSP:FGHGQSLLAQGGMNEVVR 1 18 +;Note=MSP:FGHGQSLLAQGGMNEVVR;cds_matches=B0019.1;protein_matches=WP:CE40797;times_observed=1
+I	mass_spec_genome	translated_nucleotide_match	12764361	12764411	.	-	.	ID=Target:381144;Target=Mass_spec_peptide:MSP:NIQQNRPGLSVLVLEAR 1 17 +;Note=MSP:NIQQNRPGLSVLVLEAR;cds_matches=B0019.1;protein_matches=WP:CE40797;times_observed=2
+I	Coding_transcript	mRNA	12759582	12764949	.	-	.	ID=Transcript:B0019.1;Note=amx-2;Parent=Gene:WBGene00000138;cds=B0019.1;prediction_status=Partially_confirmed;wormpep=CE:CE40797
+I	Allele	SNP	12764272	12764272	.	+	.	interpolated_map_position=14.003;rflp=No;variation=snp_B0019[1]
+I	Oligo_set	reagent	12759745	12761589	.	-	.	oligo_set=Aff_B0019.1
+I	Coding_transcript	exon	12759745	12759828	.	-	0	Parent=Transcript:B0019.1
+I	Coding_transcript	exon	12759949	12760013	.	-	2	Parent=Transcript:B0019.1
+I	Coding_transcript	exon	12760227	12760319	.	-	2	Parent=Transcript:B0019.1
+I	Coding_transcript	exon	12760365	12760494	.	-	0	Parent=Transcript:B0019.1
+I	Coding_transcript	exon	12760834	12760904	.	-	2	Parent=Transcript:B0019.1
+I	Coding_transcript	exon	12761172	12761516	.	-	2	Parent=Transcript:B0019.1
+I	Coding_transcript	exon	12761799	12761953	.	-	1	Parent=Transcript:B0019.1
+I	Coding_transcript	exon	12762127	12762268	.	-	2	Parent=Transcript:B0019.1
+I	Coding_transcript	exon	12762648	12762806	.	-	2	Parent=Transcript:B0019.1
+I	Coding_transcript	exon	12763112	12763249	.	-	2	Parent=Transcript:B0019.1
+I	Coding_transcript	exon	12763448	12763655	.	-	0	Parent=Transcript:B0019.1
+I	Coding_transcript	exon	12763729	12763882	.	-	1	Parent=Transcript:B0019.1
+I	Coding_transcript	exon	12763979	12764102	.	-	2	Parent=Transcript:B0019.1
+I	Coding_transcript	exon	12764291	12764471	.	-	0	Parent=Transcript:B0019.1
+I	Coding_transcript	exon	12764812	12764937	.	-	0	Parent=Transcript:B0019.1
+I	Coding_transcript	five_prime_UTR	12764938	12764949	.	-	.	Parent=Transcript:B0019.1
+I	Coding_transcript	three_prime_UTR	12759582	12759744	.	-	.	Parent=Transcript:B0019.1
+I	Coding_transcript	intron	12760495	12760833	.	-	.	Parent=Transcript:B0019.1;confirmed_est=EC027594
+I	Coding_transcript	intron	12760905	12761171	.	-	.	Parent=Transcript:B0019.1;confirmed_est=EC027594
+I	Coding_transcript	intron	12761517	12761798	.	-	.	Parent=Transcript:B0019.1;confirmed_est=EC027594
+I	Coding_transcript	intron	12759829	12759948	.	-	.	Parent=Transcript:B0019.1;confirmed_est=EC034652
+I	Coding_transcript	intron	12760014	12760226	.	-	.	Parent=Transcript:B0019.1;confirmed_est=EC034652
+I	Coding_transcript	intron	12760320	12760364	.	-	.	Parent=Transcript:B0019.1;confirmed_est=yk1054h04.3
+I	Coding_transcript	intron	12763883	12763978	.	-	.	Parent=Transcript:B0019.1;confirmed_est=yk1054h04.5,OSTF088D9_1
+I	Coding_transcript	intron	12764103	12764290	.	-	.	Parent=Transcript:B0019.1;confirmed_est=yk1054h04.5,OSTF088D9_1
+I	Coding_transcript	intron	12764472	12764811	.	-	.	Parent=Transcript:B0019.1;confirmed_est=yk1054h04.5,OSTF088D9_1
+I	Coding_transcript	intron	12762807	12763111	.	-	.	Parent=Transcript:B0019.1;confirmed_est=yk1056c07.5
+I	Coding_transcript	intron	12763250	12763447	.	-	.	Parent=Transcript:B0019.1;confirmed_est=yk1056c07.5
+I	Coding_transcript	intron	12763656	12763728	.	-	.	Parent=Transcript:B0019.1;confirmed_est=yk1056c07.5
+I	Coding_transcript	intron	12761954	12762126	.	-	.	Parent=Transcript:B0019.1;confirmed_est=yk262g9.5
+I	Coding_transcript	intron	12762269	12762647	.	-	.	Parent=Transcript:B0019.1;confirmed_est=yk262g9.5
+I	Promoterome	PCR_product	12764938	12766937	.	+	.	pcr_product=p_B0019.1_93
+I	GenePair_STS	PCR_product	12762449	12764118	.	+	.	pcr_product=sjj_B0019.1
+I	Coding_transcript	gene	12759582	12764949	.	-	.	ID=Gene:WBGene00000138
+III	Orfeome	PCR_product	13780230	13780850	.	+	.	amplified=1;pcr_product=mv_3R5.1.v6
+IV	Orfeome	PCR_product	17486939	17488952	.	-	.	amplified=1;pcr_product=mv_4R79.1
+IV	Orfeome	PCR_product	17480353	17483284	.	-	.	amplified=1;pcr_product=mv_4R79.2
+X	Orfeome	PCR_product	17714881	17718531	.	+	.	amplified=1;pcr_product=mv_6R55.1
+X	Orfeome	PCR_product	17712787	17714742	.	+	.	amplified=1;pcr_product=mv_6R55.2
+II	Orfeome	PCR_product	6995874	7010146	.	+	.	amplified=1;pcr_product=mv_AAA03517
+III	Orfeome	PCR_product	5625097	5631795	.	+	.	amplified=1;pcr_product=mv_AAA03544
+X	GenePair_STS	PCR_product	9962853	9963737	.	+	.	pcr_product=cenix:102-c3
+II	GenePair_STS	PCR_product	5507236	5508135	.	+	.	pcr_product=cenix:102-c4
+V	GenePair_STS	PCR_product	10117842	10118735	.	+	.	pcr_product=cenix:102-c5
+IV	GenePair_STS	PCR_product	3566130	3567025	.	+	.	pcr_product=cenix:102-c6
+X	GenePair_STS	PCR_product	6117180	6117930	.	+	.	pcr_product=cenix:102-c7
+IV	GenePair_STS	PCR_product	7189492	7190369	.	+	.	pcr_product=cenix:102-c9
+II	GenePair_STS	PCR_product	14462527	14463202	.	+	.	pcr_product=cenix:102-d1
+X	Promoterome	PCR_product	2258069	2259336	.	+	.	pcr_product=p_AH9.2_93
+IV	Promoterome	PCR_product	12157449	12159448	.	+	.	pcr_product=p_B0001.6_93
+I	Promoterome	PCR_product	12764938	12766937	.	+	.	pcr_product=p_B0019.1_93
+V	Promoterome	PCR_product	10320122	10320689	.	+	.	pcr_product=p_B0024.12_93
+I	Coding_transcript	CDS	4581214	4581237	.	-	0	ID=CDS:D1007.5b;Parent=Transcript:D1007.5b.2,Transcript:D1007.5b.1;status=Confirmed;wormpep=WP:CE33577
+I	Coding_transcript	CDS	4581664	4582026	.	-	0	ID=CDS:D1007.5b;Parent=Transcript:D1007.5b.2,Transcript:D1007.5b.1;status=Confirmed;wormpep=WP:CE33577
+I	Coding_transcript	CDS	4582412	4582718	.	-	1	ID=CDS:D1007.5b;Parent=Transcript:D1007.5b.2,Transcript:D1007.5b.1;status=Confirmed;wormpep=WP:CE33577
+I	Coding_transcript	CDS	4583190	4583374	.	-	0	ID=CDS:D1007.5b;Parent=Transcript:D1007.5b.2,Transcript:D1007.5b.1;status=Confirmed;wormpep=WP:CE33577
+I	Coding_transcript	CDS	4583426	4583509	.	-	0	ID=CDS:D1007.5b;Parent=Transcript:D1007.5b.2,Transcript:D1007.5b.1;status=Confirmed;wormpep=WP:CE33577
+I	Coding_transcript	CDS	4583560	4583805	.	-	0	ID=CDS:D1007.5b;Parent=Transcript:D1007.5b.2,Transcript:D1007.5b.1;status=Confirmed;wormpep=WP:CE33577
+I	Coding_transcript	mRNA	4580734	4583815	.	-	.	ID=Transcript:D1007.5b.1;Parent=Gene:WBGene00017003;cds=D1007.5b;prediction_status=Confirmed;wormpep=WP:CE33577
+I	Coding_transcript	mRNA	4581214	4583811	.	-	.	ID=Transcript:D1007.5b.2;Parent=Gene:WBGene00017003;cds=D1007.5b;prediction_status=Confirmed;wormpep=WP:CE33577
+I	Coding_transcript	exon	4581214	4581237	.	-	0	Parent=Transcript:D1007.5b.1
+I	Coding_transcript	exon	4581664	4582026	.	-	0	Parent=Transcript:D1007.5b.1
+I	Coding_transcript	exon	4582412	4582718	.	-	1	Parent=Transcript:D1007.5b.1
+I	Coding_transcript	exon	4583190	4583374	.	-	0	Parent=Transcript:D1007.5b.1
+I	Coding_transcript	exon	4583426	4583509	.	-	0	Parent=Transcript:D1007.5b.1
+I	Coding_transcript	exon	4583560	4583805	.	-	0	Parent=Transcript:D1007.5b.1
+I	Coding_transcript	five_prime_UTR	4583806	4583815	.	-	.	Parent=Transcript:D1007.5b.1
+I	Coding_transcript	three_prime_UTR	4580734	4581213	.	-	.	Parent=Transcript:D1007.5b.1
+I	Coding_transcript	intron	4582027	4582411	.	-	.	Parent=Transcript:D1007.5b.1;confirmed_est=EB994038
+I	Coding_transcript	intron	4583375	4583425	.	-	.	Parent=Transcript:D1007.5b.1;confirmed_est=EC038345,OSTF085G5_1
+I	Coding_transcript	intron	4583510	4583559	.	-	.	Parent=Transcript:D1007.5b.1;confirmed_est=EC038345,OSTF085G5_1
+I	Coding_transcript	intron	4582719	4583189	.	-	.	Parent=Transcript:D1007.5b.1;confirmed_est=yk1055g06.5,OSTF085G5_1
+I	Coding_transcript	intron	4581238	4581663	.	-	.	Parent=Transcript:D1007.5b.1;confirmed_est=yk1057e08.3
+I	Coding_transcript	exon	4581214	4581237	.	-	0	Parent=Transcript:D1007.5b.2
+I	Coding_transcript	exon	4581664	4582026	.	-	0	Parent=Transcript:D1007.5b.2
+I	Coding_transcript	exon	4582412	4582718	.	-	1	Parent=Transcript:D1007.5b.2
+I	Coding_transcript	exon	4583190	4583374	.	-	0	Parent=Transcript:D1007.5b.2
+I	Coding_transcript	exon	4583426	4583509	.	-	0	Parent=Transcript:D1007.5b.2
+I	Coding_transcript	exon	4583560	4583805	.	-	0	Parent=Transcript:D1007.5b.2
+I	Coding_transcript	five_prime_UTR	4583806	4583811	.	-	.	Parent=Transcript:D1007.5b.2
+I	Coding_transcript	intron	4582027	4582411	.	-	.	Parent=Transcript:D1007.5b.2;confirmed_est=EB994038
+I	Coding_transcript	intron	4583375	4583425	.	-	.	Parent=Transcript:D1007.5b.2;confirmed_est=EC038345,OSTF085G5_1
+I	Coding_transcript	intron	4583510	4583559	.	-	.	Parent=Transcript:D1007.5b.2;confirmed_est=EC038345,OSTF085G5_1
+I	Coding_transcript	intron	4582719	4583189	.	-	.	Parent=Transcript:D1007.5b.2;confirmed_est=yk1055g06.5,OSTF085G5_1
+I	Coding_transcript	intron	4581238	4581663	.	-	.	Parent=Transcript:D1007.5b.2;confirmed_est=yk1057e08.3
+I	Coding_transcript	gene	4580693	4583815	.	-	.	ID=Gene:WBGene00017003
+I	SAGE_tag_unambiguously_mapped	SAGE_tag	4581093	4581113	.	-	.	count=10;gene=D1007.5;sequence=SAGE:tttgcgaattacttgct;transcript=D1007.5b.1,D1007.5a
+I	SAGE_tag_unambiguously_mapped	SAGE_tag	4580748	4580768	.	-	.	count=112;gene=D1007.5;sequence=SAGE:ttttccattaattttga;transcript=D1007.5b.1,D1007.5a
+I	SAGE_tag_unambiguously_mapped	SAGE_tag	4582415	4582428	.	-	.	count=1;gene=D1007.5;sequence=SAGE:cattttcgtg;transcript=D1007.5b.2,D1007.5b.1,D1007.5a
+I	SAGE_tag_unambiguously_mapped	SAGE_tag	4580914	4580927	.	-	.	count=1;gene=D1007.5;sequence=SAGE:taaatttcaa;transcript=D1007.5b.1,D1007.5a
+I	SAGE_tag_unambiguously_mapped	SAGE_tag	4581193	4581206	.	-	.	count=1;gene=D1007.5;sequence=SAGE:tgctcgttcg;transcript=D1007.5b.1,D1007.5a
+I	SAGE_tag_unambiguously_mapped	SAGE_tag	4583465	4583478	.	-	.	count=1;gene=D1007.5;sequence=SAGE:tgttggcctt;transcript=D1007.5b.2,D1007.5b.1,D1007.5a
+I	SAGE_tag_unambiguously_mapped	SAGE_tag	4583458	4583478	.	-	.	count=1;gene=D1007.5;sequence=SAGE:tgttggccttttacttg;transcript=D1007.5b.2,D1007.5b.1,D1007.5a
+I	SAGE_tag_unambiguously_mapped	SAGE_tag	4582533	4582553	.	-	.	count=2;gene=D1007.5;sequence=SAGE:tgcagtgatagtccagc;transcript=D1007.5b.2,D1007.5b.1,D1007.5a
+I	SAGE_tag_unambiguously_mapped	SAGE_tag	4581100	4581113	.	-	.	count=2;gene=D1007.5;sequence=SAGE:tttgcgaatt;transcript=D1007.5b.1,D1007.5a
+I	SAGE_tag_unambiguously_mapped	SAGE_tag	4580755	4580768	.	-	.	count=43;gene=D1007.5;sequence=SAGE:ttttccatta;transcript=D1007.5b.1,D1007.5a
+I	Coding_transcript	CDS	4580993	4581241	.	-	0	ID=CDS:D1007.5a;Parent=Transcript:D1007.5a;status=Confirmed;wormpep=CE:CE29034
+I	Coding_transcript	CDS	4581664	4582026	.	-	0	ID=CDS:D1007.5a;Parent=Transcript:D1007.5a;status=Confirmed;wormpep=CE:CE29034
+I	Coding_transcript	CDS	4582412	4582718	.	-	1	ID=CDS:D1007.5a;Parent=Transcript:D1007.5a;status=Confirmed;wormpep=CE:CE29034
+I	Coding_transcript	CDS	4583190	4583374	.	-	0	ID=CDS:D1007.5a;Parent=Transcript:D1007.5a;status=Confirmed;wormpep=CE:CE29034
+I	Coding_transcript	CDS	4583426	4583509	.	-	0	ID=CDS:D1007.5a;Parent=Transcript:D1007.5a;status=Confirmed;wormpep=CE:CE29034
+I	Coding_transcript	CDS	4583560	4583805	.	-	0	ID=CDS:D1007.5a;Parent=Transcript:D1007.5a;status=Confirmed;wormpep=CE:CE29034
+I	mass_spec_genome	translated_nucleotide_match	4580996	4581052	.	-	.	ID=Target:277116;Target=Mass_spec_peptide:MSP:IYEPSQEDLLLMHQLQQER 1 19 +;Note=MSP:IYEPSQEDLLLMHQLQQER;cds_matches=D1007.5a;protein_matches=WP:CE29034;times_observed=1
+I	mass_spec_genome	translated_nucleotide_match	4581838	4581882	.	-	.	ID=Target:277138;Target=Mass_spec_peptide:MSP:AAIHLGSWHQIEGPR 1 15 +;Note=MSP:AAIHLGSWHQIEGPR;cds_matches=D1007.5b D1007.5a;protein_matches=WP:CE33577 WP:CE29034;times_observed=1
+I	mass_spec_genome	translated_nucleotide_match	4583581	4583601	.	-	.	ID=Target:277176;Target=Mass_spec_peptide:MSP:TLWWLPK 1 7 +;Note=MSP:TLWWLPK;cds_matches=D1007.5b D1007.5a;protein_matches=WP:CE33577 WP:CE29034;times_observed=1
+I	Coding_transcript	mRNA	4580693	4583811	.	-	.	ID=Transcript:D1007.5a;Parent=Gene:WBGene00017003;cds=D1007.5a;prediction_status=Confirmed;wormpep=CE:CE29034
+I	Coding_transcript	exon	4580993	4581241	.	-	0	Parent=Transcript:D1007.5a
+I	Coding_transcript	exon	4581664	4582026	.	-	0	Parent=Transcript:D1007.5a
+I	Coding_transcript	exon	4582412	4582718	.	-	1	Parent=Transcript:D1007.5a
+I	Coding_transcript	exon	4583190	4583374	.	-	0	Parent=Transcript:D1007.5a
+I	Coding_transcript	exon	4583426	4583509	.	-	0	Parent=Transcript:D1007.5a
+I	Coding_transcript	exon	4583560	4583805	.	-	0	Parent=Transcript:D1007.5a
+I	Coding_transcript	five_prime_UTR	4583806	4583811	.	-	.	Parent=Transcript:D1007.5a
+I	Coding_transcript	three_prime_UTR	4580693	4580992	.	-	.	Parent=Transcript:D1007.5a
+I	Coding_transcript	intron	4582027	4582411	.	-	.	Parent=Transcript:D1007.5a;confirmed_est=EB994038
+I	Coding_transcript	intron	4581242	4581663	.	-	.	Parent=Transcript:D1007.5a;confirmed_est=EB994038,OSTR085G5_1
+I	Coding_transcript	intron	4583375	4583425	.	-	.	Parent=Transcript:D1007.5a;confirmed_est=EC038345,OSTF085G5_1
+I	Coding_transcript	intron	4583510	4583559	.	-	.	Parent=Transcript:D1007.5a;confirmed_est=EC038345,OSTF085G5_1
+I	Coding_transcript	intron	4582719	4583189	.	-	.	Parent=Transcript:D1007.5a;confirmed_est=yk1055g06.5,OSTF085G5_1


=====================================
Tests/GFF/ensembl_gtf.txt
=====================================
@@ -0,0 +1,33 @@
+I	snoRNA	exon	3747	3909	.	-	.	 gene_id "Y74C9A.6"; transcript_id "Y74C9A.6"; exon_number "1"; gene_name "Y74C9A.6"; transcript_name "NR_001477.2";
+I	protein_coding	exon	12764812	12764949	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "1"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12764812	12764937	.	-	0	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "1"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	start_codon	12764935	12764937	.	-	0	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "1"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	exon	12764291	12764471	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "2"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12764291	12764471	.	-	0	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "2"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12763979	12764102	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "3"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12763979	12764102	.	-	2	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "3"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12763729	12763882	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "4"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12763729	12763882	.	-	1	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "4"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12763448	12763655	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "5"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12763448	12763655	.	-	0	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "5"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12763112	12763249	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "6"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12763112	12763249	.	-	2	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "6"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12762648	12762806	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "7"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12762648	12762806	.	-	2	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "7"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12762127	12762268	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "8"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12762127	12762268	.	-	2	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "8"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12761799	12761953	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "9"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12761799	12761953	.	-	1	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "9"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12761172	12761516	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "10"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12761172	12761516	.	-	2	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "10"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12760834	12760904	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "11"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12760834	12760904	.	-	2	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "11"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12760365	12760494	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "12"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12760365	12760494	.	-	0	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "12"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12760227	12760319	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "13"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12760227	12760319	.	-	2	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "13"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12759949	12760013	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "14"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12759949	12760013	.	-	2	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "14"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12759579	12759828	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "15"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12759748	12759828	.	-	0	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "15"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	stop_codon	12759745	12759747	.	-	0	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "15"; gene_name "amx-2"; transcript_name "B0019.1";


=====================================
Tests/GFF/glimmer_nokeyval.gff3
=====================================
@@ -0,0 +1,6 @@
+##gff-version 3
+##sequence-region scaffold4215_3 1 6526
+scaffold4215_3	glimmer	gene	3	62	.	-	.	ID=GL0000006;Name=GL0000006;Lack 3'-end;
+scaffold4215_3	glimmer	mRNA	3	62	.	-	.	ID=GL0000006;Name=GL0000006;Parent=GL0000006;Lack 3'-end;
+scaffold4215_3	glimmer	CDS	3	62	2.84	-	0	Parent=GL0000006;Lack 3'-end;
+scaffold4215_3	glimmer	gene	124	1983	.	-	.	ID=GL0000007;Name=GL0000007;Complete;


=====================================
Tests/GFF/hybrid1.gff3
=====================================
@@ -0,0 +1,17 @@
+##gff-version 3
+##sequence-region foo 1 100
+##feature-ontology bar
+##attribute-ontology baz
+##source-ontology boo
+##sequence-region chr17 62467934 62469545 
+chr17	UCSC	mRNA	62467934	62469545	.	-	.	ID=A00469;Dbxref=AFFX-U133:205840_x_at,Locuslink:2688,Genbank-mRNA:A00469,Swissprot:P01241,PFAM:PF00103,AFFX-U95:1332_f_at,Swissprot:SOMA_HUMAN;Note=growth%20hormone%201;Alias=GH1
+chr17	UCSC	CDS	62468039	62468236	.	-	1	Parent=A00469
+chr17	UCSC	CDS	62468490	62468654	.	-	2	Parent=A00469
+chr17	UCSC	CDS	62468747	62468866	.	-	1	Parent=A00469
+chr17	UCSC	CDS	62469076	62469236	.	-	1	Parent=A00469
+chr17	UCSC	CDS	62469497	62469506	.	-	0	Parent=A00469
+###
+##FASTA
+>chr17
+GATTACA
+GATTACA


=====================================
Tests/GFF/jgi_gff2.txt
=====================================
@@ -0,0 +1,6 @@
+chr_1	JGI	exon	37061	37174	.	-	.	name "fgenesh1_pg.C_chr_1000007"; transcriptId 873
+chr_1	JGI	CDS	37061	37174	.	-	0	name "fgenesh1_pg.C_chr_1000007"; proteinId 873; exonNumber 3
+chr_1	JGI	exon	37315	37620	.	-	.	name "fgenesh1_pg.C_chr_1000007"; transcriptId 873
+chr_1	JGI	CDS	37315	37620	.	-	0	name "fgenesh1_pg.C_chr_1000007"; proteinId 873; exonNumber 2
+chr_1	JGI	exon	37752	38216	.	-	.	name "fgenesh1_pg.C_chr_1000007"; transcriptId 873
+chr_1	JGI	CDS	37752	38216	.	-	0	name "fgenesh1_pg.C_chr_1000007"; proteinId 873; exonNumber 1


=====================================
Tests/GFF/mouse_extra_comma.gff3
=====================================
@@ -0,0 +1,17 @@
+chr17	RefSeq	gene	6797760	6818159	.	+	.	ID=NC_000083.5:LOC100040603;Name=NC_000083.5:LOC100040603
+chr17	RefSeq	mRNA	6797760	6818159	.	+	.	ID=XM_001475631.1;Parent=NC_000083.5:LOC100040603
+chr17	RefSeq	protein	6806527	6812289	.	+	.	ID=;Parent=XM_001475631.1
+chr17	RefSeq	five_prime_UTR	6797760	6797769	.	+	.	Parent=XM_001475631.1
+chr17	RefSeq	five_prime_UTR	6806513	6806526	.	+	.	Parent=XM_001475631.1
+chr17	RefSeq	CDS	6806527	6806553	.	+	0	Name=CDS:NC_000083.5:LOC100040603;Parent=XM_001475631.1,
+chr17	RefSeq	CDS	6808204	6808245	.	+	0	Name=CDS:NC_000083.5:LOC100040603;Parent=XM_001475631.1,
+chr17	RefSeq	CDS	6811330	6811453	.	+	0	Name=CDS:NC_000083.5:LOC100040603;Parent=XM_001475631.1,
+chr17	RefSeq	CDS	6811792	6811869	.	+	2	Name=CDS:NC_000083.5:LOC100040603;Parent=XM_001475631.1,
+chr17	RefSeq	CDS	6812219	6812289	.	+	2	Name=CDS:NC_000083.5:LOC100040603;Parent=XM_001475631.1,
+chr17	RefSeq	three_prime_UTR	6812290	6818159	.	+	.	Parent=XM_001475631.1
+chr17	RefSeq	exon	6797760	6797769	.	+	.	Parent=XM_001475631.1
+chr17	RefSeq	exon	6806513	6806553	.	+	.	Parent=XM_001475631.1
+chr17	RefSeq	exon	6808204	6808245	.	+	.	Parent=XM_001475631.1
+chr17	RefSeq	exon	6811330	6811453	.	+	.	Parent=XM_001475631.1
+chr17	RefSeq	exon	6811792	6811869	.	+	.	Parent=XM_001475631.1
+chr17	RefSeq	exon	6812219	6818159	.	+	.	Parent=XM_001475631.1


=====================================
Tests/GFF/ncbi_gff3.txt
=====================================
@@ -0,0 +1,21 @@
+##gff-version 3
+##source-version NCBI C++ formatter 0.2
+##date 2009-04-25
+##Type DNA NC_008596.1
+NC_008596.1	RefSeq	gene	12272	13301	.	+	.	locus_tag=MSMEG_0013;note=ferric%20enterobactin%20transport%20system%20permease%20protein%20FepG%3B%20this%20gene%20contains%20a%20frame%20shift%20which%20is%20not%20the%20result%20of%20sequencing%20error%3B%20identified%20by%20match%20to%20protein%20family%20HMM%20PF01032;pseudo=;db_xref=GeneID:4537201
+NC_008596.1	RefSeq	gene	1137579	1138550	.	+	.	ID=NC_008596.1:speB;locus_tag=MSMEG_1072;db_xref=GeneID:4535378
+NC_008596.1	RefSeq	CDS	1137579	1138547	.	+	0	ID=NC_008596.1:speB:unknown_transcript_1;Parent=NC_008596.1:speB;locus_tag=MSMEG_1072;EC_number=3.5.3.11;note=identified%20by%20match%20to%20protein%20family%20HMM%20PF00491%3B%20match%20to%20protein%20family%20HMM%20TIGR01230;transl_table=11;product=agmatinase;protein_id=YP_885468.1;db_xref=GI:118469242;db_xref=GeneID:4535378;exon_number=1
+NC_008596.1	RefSeq	start_codon	1137579	1137581	.	+	0	ID=NC_008596.1:speB:unknown_transcript_1;Parent=NC_008596.1:speB;locus_tag=MSMEG_1072;EC_number=3.5.3.11;note=identified%20by%20match%20to%20protein%20family%20HMM%20PF00491%3B%20match%20to%20protein%20family%20HMM%20TIGR01230;transl_table=11;product=agmatinase;protein_id=YP_885468.1;db_xref=GI:118469242;db_xref=GeneID:4535378;exon_number=1
+NC_008596.1	RefSeq	stop_codon	1138548	1138550	.	+	0	ID=NC_008596.1:speB:unknown_transcript_1;Parent=NC_008596.1:speB;locus_tag=MSMEG_1072;EC_number=3.5.3.11;note=identified%20by%20match%20to%20protein%20family%20HMM%20PF00491%3B%20match%20to%20protein%20family%20HMM%20TIGR01230;transl_table=11;product=agmatinase;protein_id=YP_885468.1;db_xref=GI:118469242;db_xref=GeneID:4535378;exon_number=1
+NC_008596.1	RefSeq	gene	3597069	3598112	.	+	.	ID=NC_008596.1:speB;locus_tag=MSMEG_3535;db_xref=GeneID:4533678
+NC_008596.1	RefSeq	CDS	3597069	3598109	.	+	0	ID=NC_008596.1:speB:unknown_transcript_2;Parent=NC_008596.1:speB;locus_tag=MSMEG_3535;EC_number=3.5.3.11;note=identified%20by%20match%20to%20protein%20family%20HMM%20PF00491%3B%20match%20to%20protein%20family%20HMM%20TIGR01230;transl_table=11;product=agmatinase;protein_id=YP_887838.1;db_xref=GI:118470943;db_xref=GeneID:4533678;exon_number=1
+NC_008596.1	RefSeq	start_codon	3597069	3597071	.	+	0	ID=NC_008596.1:speB:unknown_transcript_2;Parent=NC_008596.1:speB;locus_tag=MSMEG_3535;EC_number=3.5.3.11;note=identified%20by%20match%20to%20protein%20family%20HMM%20PF00491%3B%20match%20to%20protein%20family%20HMM%20TIGR01230;transl_table=11;product=agmatinase;protein_id=YP_887838.1;db_xref=GI:118470943;db_xref=GeneID:4533678;exon_number=1
+NC_008596.1	RefSeq	stop_codon	3598110	3598112	.	+	0	ID=NC_008596.1:speB:unknown_transcript_2;Parent=NC_008596.1:speB;locus_tag=MSMEG_3535;EC_number=3.5.3.11;note=identified%20by%20match%20to%20protein%20family%20HMM%20PF00491%3B%20match%20to%20protein%20family%20HMM%20TIGR01230;transl_table=11;product=agmatinase;protein_id=YP_887838.1;db_xref=GI:118470943;db_xref=GeneID:4533678;exon_number=1
+NC_008596.1	RefSeq	gene	4460713	4461672	.	-	.	ID=NC_008596.1:speB;locus_tag=MSMEG_4374;db_xref=GeneID:4535424
+NC_008596.1	RefSeq	CDS	4460716	4461672	.	-	0	ID=NC_008596.1:speB:unknown_transcript_3;Parent=NC_008596.1:speB;locus_tag=MSMEG_4374;EC_number=3.5.3.11;note=identified%20by%20match%20to%20protein%20family%20HMM%20PF00491%3B%20match%20to%20protein%20family%20HMM%20TIGR01230;transl_table=11;product=agmatinase;protein_id=YP_888649.1;db_xref=GI:118469662;db_xref=GeneID:4535424;exon_number=1
+NC_008596.1	RefSeq	start_codon	4461670	4461672	.	-	0	ID=NC_008596.1:speB:unknown_transcript_3;Parent=NC_008596.1:speB;locus_tag=MSMEG_4374;EC_number=3.5.3.11;note=identified%20by%20match%20to%20protein%20family%20HMM%20PF00491%3B%20match%20to%20protein%20family%20HMM%20TIGR01230;transl_table=11;product=agmatinase;protein_id=YP_888649.1;db_xref=GI:118469662;db_xref=GeneID:4535424;exon_number=1
+NC_008596.1	RefSeq	stop_codon	4460713	4460715	.	-	0	ID=NC_008596.1:speB:unknown_transcript_3;Parent=NC_008596.1:speB;locus_tag=MSMEG_4374;EC_number=3.5.3.11;note=identified%20by%20match%20to%20protein%20family%20HMM%20PF00491%3B%20match%20to%20protein%20family%20HMM%20TIGR01230;transl_table=11;product=agmatinase;protein_id=YP_888649.1;db_xref=GI:118469662;db_xref=GeneID:4535424;exon_number=1
+NC_008596.1	RefSeq	gene	4539385	4540344	.	+	.	ID=NC_008596.1:speB;locus_tag=MSMEG_4459;db_xref=GeneID:4537057
+NC_008596.1	RefSeq	CDS	4539385	4540341	.	+	0	ID=NC_008596.1:speB:unknown_transcript_4;Parent=NC_008596.1:speB;locus_tag=MSMEG_4459;EC_number=3.5.3.11;note=identified%20by%20match%20to%20protein%20family%20HMM%20PF00491%3B%20match%20to%20protein%20family%20HMM%20TIGR01230;transl_table=11;product=agmatinase;protein_id=YP_888732.1;db_xref=GI:118472833;db_xref=GeneID:4537057;exon_number=1
+NC_008596.1	RefSeq	start_codon	4539385	4539387	.	+	0	ID=NC_008596.1:speB:unknown_transcript_4;Parent=NC_008596.1:speB;locus_tag=MSMEG_4459;EC_number=3.5.3.11;note=identified%20by%20match%20to%20protein%20family%20HMM%20PF00491%3B%20match%20to%20protein%20family%20HMM%20TIGR01230;transl_table=11;product=agmatinase;protein_id=YP_888732.1;db_xref=GI:118472833;db_xref=GeneID:4537057;exon_number=1
+NC_008596.1	RefSeq	stop_codon	4540342	4540344	.	+	0	ID=NC_008596.1:speB:unknown_transcript_4;Parent=NC_008596.1:speB;locus_tag=MSMEG_4459;EC_number=3.5.3.11;note=identified%20by%20match%20to%20protein%20family%20HMM%20PF00491%3B%20match%20to%20protein%20family%20HMM%20TIGR01230;transl_table=11;product=agmatinase;protein_id=YP_888732.1;db_xref=GI:118472833;db_xref=GeneID:4537057;exon_number=1


=====================================
Tests/GFF/problem_sequence_region.gff3
=====================================
@@ -0,0 +1,7 @@
+##gff-version 3
+#!gff-spec-version 1.21
+#!processor NCBI annotwriter
+##sequence-region  1 2482535
+##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=1282
+1	Local	region	1	2482535	.	+	.	ID=1:1..2482535;Dbxref=taxon:1282;Is_circular=true;Name=ANONYMOUS;gbkey=Src;genome=chromosome;mol_type=genomic DNA
+1	.	gene	1	1356	.	+	.	ID=gene-test_000001;Name=dnaA;gbkey=Gene;gene=dnaA;gene_biotype=protein_coding


=====================================
Tests/GFF/spaces.gff3
=====================================
@@ -0,0 +1,5 @@
+##gff-version 3
+contig1	.	gene	1544	2057	.	-	.	ID=contig1.1
+contig1	.	mRNA	1544	2057	.	-	.	ID=mRNA.contig1.1;Parent=contig1.1   
+contig1	.	mRNA	1544	2057	.	-	.	 foo=bar;ID=mRNA.contig1.1;Parent=contig1.1   
+contig1	.	mRNA	1544	2057	.	-	.	ID=mRNA.contig1.1;Parent=contig1.1; foo=bar


=====================================
Tests/GFF/trans_splicing.gff3
=====================================
@@ -0,0 +1,11 @@
+1	manual	gene	9559	9672	.	+	.	ID=gene83;Name=rps12|lcl|NC_021456.1_cdsid_YP_008082803.1_8-gene;exception=trans-splicing
+1	manual	gene	112442	113241	.	+	.	ID=gene84;Name=rps12|lcl|NC_021456.1_cdsid_YP_008082803.1_8-gene;exception=trans-splicing
+1	manual	mRNA	9559	9672	.	+	.	ID=mRNA43;Parent=gene83,gene84;Name=rps12|lcl|NC_021456.1_cdsid_YP_008082803.1_8;exception=trans-splicing
+1	manual	mRNA	112442	113241	.	+	.	ID=mRNA43;Parent=gene83,gene84;Name=rps12|lcl|NC_021456.1_cdsid_YP_008082803.1_8;exception=trans-splicing
+1	manual	exon	9559	9672	.	+	.	Parent=mRNA43
+1	manual	CDS	9559	9672	.	+	0	Parent=mRNA43
+1	manual	exon	112442	112673	.	+	.	Parent=mRNA43
+1	manual	CDS	112442	112673	.	+	0	Parent=mRNA43
+1	manual	intron	112674	113215	.	+	.	Parent=mRNA43
+1	manual	exon	113216	113241	.	+	.	Parent=mRNA43
+1	manual	CDS	113216	113241	.	+	2	Parent=mRNA43


=====================================
Tests/GFF/transcripts.gff3
=====================================
@@ -0,0 +1,18 @@
+##gff-version 3
+##date 2013-11-13
+edit_test.fa	.	gene	500	2610	.	+	.	ID=newGene
+edit_test.fa	.	mRNA	500	2385	.	+	.	Parent=newGene;Namo=reinhard+did+this;Name=t1%28newGene%29;ID=t1;uri=http%3A//www.yahoo.com
+edit_test.fa	.	five_prime_UTR	500	802	.	+	.	Parent=t1
+edit_test.fa	.	CDS	803	1012	.	+	.	Parent=t1
+edit_test.fa	.	three_prime_UTR	1013	1168	.	+	.	Parent=t1
+edit_test.fa	.	three_prime_UTR	1475	1654	.	+	.	Parent=t1
+edit_test.fa	.	three_prime_UTR	1720	1908	.	+	.	Parent=t1
+edit_test.fa	.	three_prime_UTR	2047	2385	.	+	.	Parent=t1
+edit_test.fa	.	mRNA	1050	2610	.	+	.	Parent=newGene;Name=t2%28newGene%29;ID=t2
+edit_test.fa	.	CDS	1050	1196	.	+	.	Parent=t2
+edit_test.fa	.	CDS	1472	1651	.	+	.	Parent=t2
+edit_test.fa	.	CDS	1732	2610	.	+	.	Parent=t2
+edit_test.fa	.	mRNA	1050	2610	.	+	.	Parent=newGene;Name=t3%28newGene%29;ID=t3
+edit_test.fa	.	CDS	1050	1196	.	+	.	Parent=t3
+edit_test.fa	.	CDS	1472	1651	.	+	.	Parent=t3
+edit_test.fa	.	CDS	1732	2610	.	+	.	Parent=t3


=====================================
Tests/GFF/unescaped-semicolon.gff3
=====================================
@@ -0,0 +1,2 @@
+##gff-version 3
+chr1	.	gene	1	100	.	+	.	ID=PH01000020G1780;Description="osFTL6 FT-Like6 homologous to Flowering Locus T gene; contains Pfam profile PF01161: Phosphatidylethanolamine-binding protein, expressed"
\ No newline at end of file


=====================================
Tests/GFF/wormbase_gff2.txt
=====================================
@@ -0,0 +1,63 @@
+I	Genomic_canonical	region	1	2679	.	+	.	Sequence "cTel33B" ; Note "Clone cTel33B; Genbank AC199162" ; Note "Clone cTel33B; Genbank AC199162"
+I	Coding_transcript	Transcript	12759582	12764949	.	-	.	Transcript "B0019.1" ; WormPep "WP:CE40797" ; Note "amx-2" ; Prediction_status "Partially_confirmed" ; Gene "WBGene00000138" ; CDS "B0019.1" ; WormPep "WP:CE40797" ; Note "amx-2" ; Prediction_status "Partially_confirmed" ; Gene "WBGene00000138"
+I	Coding_transcript	intron	12759829	12759948	.	-	.	Transcript "B0019.1" ; Confirmed_EST EC034652
+I	Coding_transcript	intron	12760014	12760226	.	-	.	Transcript "B0019.1" ; Confirmed_EST EC034652
+I	Coding_transcript	intron	12760320	12760364	.	-	.	Transcript "B0019.1" ; Confirmed_EST yk1054h04.3
+I	Coding_transcript	intron	12760495	12760833	.	-	.	Transcript "B0019.1" ; Confirmed_EST EC027594
+I	Coding_transcript	intron	12760905	12761171	.	-	.	Transcript "B0019.1" ; Confirmed_EST EC027594
+I	Coding_transcript	intron	12761517	12761798	.	-	.	Transcript "B0019.1" ; Confirmed_EST EC027594
+I	Coding_transcript	intron	12761954	12762126	.	-	.	Transcript "B0019.1" ; Confirmed_EST yk262g9.5
+I	Coding_transcript	intron	12762269	12762647	.	-	.	Transcript "B0019.1" ; Confirmed_EST yk262g9.5
+I	Coding_transcript	intron	12762807	12763111	.	-	.	Transcript "B0019.1" ; Confirmed_EST yk1056c07.5
+I	Coding_transcript	intron	12763250	12763447	.	-	.	Transcript "B0019.1" ; Confirmed_EST yk1056c07.5
+I	Coding_transcript	intron	12763656	12763728	.	-	.	Transcript "B0019.1" ; Confirmed_EST yk1056c07.5
+I	Coding_transcript	intron	12763883	12763978	.	-	.	Transcript "B0019.1" ; Confirmed_EST yk1054h04.5 ; Confirmed_EST OSTF088D9_1
+I	Coding_transcript	intron	12764103	12764290	.	-	.	Transcript "B0019.1" ; Confirmed_EST yk1054h04.5 ; Confirmed_EST OSTF088D9_1
+I	Coding_transcript	intron	12764472	12764811	.	-	.	Transcript "B0019.1" ; Confirmed_EST yk1054h04.5 ; Confirmed_EST OSTF088D9_1
+I	Coding_transcript	exon	12759582	12759828	.	-	.	Transcript "B0019.1"
+I	Coding_transcript	exon	12759949	12760013	.	-	.	Transcript "B0019.1"
+I	Coding_transcript	exon	12760227	12760319	.	-	.	Transcript "B0019.1"
+I	Coding_transcript	exon	12760365	12760494	.	-	.	Transcript "B0019.1"
+I	Coding_transcript	exon	12760834	12760904	.	-	.	Transcript "B0019.1"
+I	Coding_transcript	exon	12761172	12761516	.	-	.	Transcript "B0019.1"
+I	Coding_transcript	exon	12761799	12761953	.	-	.	Transcript "B0019.1"
+I	Coding_transcript	exon	12762127	12762268	.	-	.	Transcript "B0019.1"
+I	Coding_transcript	exon	12762648	12762806	.	-	.	Transcript "B0019.1"
+I	Coding_transcript	exon	12763112	12763249	.	-	.	Transcript "B0019.1"
+I	Coding_transcript	exon	12763448	12763655	.	-	.	Transcript "B0019.1"
+I	Coding_transcript	exon	12763729	12763882	.	-	.	Transcript "B0019.1"
+I	Coding_transcript	exon	12763979	12764102	.	-	.	Transcript "B0019.1"
+I	Coding_transcript	exon	12764291	12764471	.	-	.	Transcript "B0019.1"
+I	Coding_transcript	exon	12764812	12764949	.	-	.	Transcript "B0019.1"
+I	SAGE_tag_unambiguously_mapped	SAGE_tag	12761492	12761512	.	-	.	Sequence SAGE:aacggagccgtacacgc;count 5;Gene amx-2;Transcript B0019.1
+I	SAGE_tag_most_three_prime	SAGE_tag	12761499	12761512	.	-	.	Sequence SAGE:aacggagccg;count 9;Gene amx-2;Transcript B0019.1
+I	mass_spec_genome	translated_nucleotide_match	12761920	12761953	.	-	.	Target "Mass_spec_peptide:MSP:FADFSPLDVSDVNFATDDLAK" 10 21 ; Note "MSP:FADFSPLDVSDVNFATDDLAK" ; Protein_matches "WP:CE40797" ; CDS_matches "B0019.1" ; Times_observed "3"
+I	mass_spec_genome	translated_nucleotide_match	12762127	12762155	.	-	.	Target "Mass_spec_peptide:MSP:FADFSPLDVSDVNFATDDLAK" 1 10 ; Note "MSP:FADFSPLDVSDVNFATDDLAK" ; Protein_matches "WP:CE40797" ; CDS_matches "B0019.1" ; Times_observed "3"
+I	mass_spec_genome	translated_nucleotide_match	12763506	12763559	.	-	.	Target "Mass_spec_peptide:MSP:FGHGQSLLAQGGMNEVVR" 1 18 ; Note "MSP:FGHGQSLLAQGGMNEVVR" ; Protein_matches "WP:CE40797" ; CDS_matches "B0019.1" ; Times_observed "1"
+I	SAGE_tag_unambiguously_mapped	SAGE_tag	12763533	12763553	.	-	.	Sequence SAGE:ggcagagtcttttggca;count 1;Gene amx-2;Transcript B0019.1
+I	mass_spec_genome	translated_nucleotide_match	12764361	12764411	.	-	.	Target "Mass_spec_peptide:MSP:NIQQNRPGLSVLVLEAR" 1 17 ; Note "MSP:NIQQNRPGLSVLVLEAR" ; Protein_matches "WP:CE40797" ; CDS_matches "B0019.1" ; Times_observed "2"
+I	GenePair_STS	PCR_product	12762449	12764118	.	+	.	PCR_product "sjj_B0019.1"
+I	Expr_profile	experimental_result_region	12762449	12764118	.	+	.	Expr_profile "B0019.1"
+I	Allele	SNP	12764272	12764272	.	+	.	Variation "snp_B0019[1]" ; Interpolated_map_position "14.003" ; ; RFLP "No"
+I	Promoterome	PCR_product	12764938	12766937	.	+	.	PCR_product "p_B0019.1_93"
+I	Oligo_set	reagent	12759745	12761589	.	-	.	Oligo_set "Aff_B0019.1"
+I	Orfeome	PCR_product	12759747	12764936	.	-	.	PCR_product "mv_B0019.1" ; Amplified 1 ; Amplified 1
+I	Coding_transcript	three_prime_UTR	12759582	12759744	.	-	.	Transcript "B0019.1"
+I	Coding_transcript	coding_exon	12759745	12759828	.	-	0	Transcript "B0019.1" ; CDS "B0019.1"
+I	Coding_transcript	coding_exon	12759949	12760013	.	-	2	Transcript "B0019.1" ; CDS "B0019.1"
+I	Coding_transcript	coding_exon	12760227	12760319	.	-	2	Transcript "B0019.1" ; CDS "B0019.1"
+I	Coding_transcript	coding_exon	12760365	12760494	.	-	0	Transcript "B0019.1" ; CDS "B0019.1"
+I	Coding_transcript	coding_exon	12760834	12760904	.	-	2	Transcript "B0019.1" ; CDS "B0019.1"
+I	Coding_transcript	coding_exon	12761172	12761516	.	-	2	Transcript "B0019.1" ; CDS "B0019.1"
+I	Coding_transcript	coding_exon	12761799	12761953	.	-	1	Transcript "B0019.1" ; CDS "B0019.1"
+I	Coding_transcript	coding_exon	12762127	12762268	.	-	2	Transcript "B0019.1" ; CDS "B0019.1"
+I	Coding_transcript	coding_exon	12762648	12762806	.	-	2	Transcript "B0019.1" ; CDS "B0019.1"
+I	Coding_transcript	coding_exon	12763112	12763249	.	-	2	Transcript "B0019.1" ; CDS "B0019.1"
+I	Coding_transcript	coding_exon	12763448	12763655	.	-	0	Transcript "B0019.1" ; CDS "B0019.1"
+I	Coding_transcript	coding_exon	12763729	12763882	.	-	1	Transcript "B0019.1" ; CDS "B0019.1"
+I	Coding_transcript	coding_exon	12763979	12764102	.	-	2	Transcript "B0019.1" ; CDS "B0019.1"
+I	Coding_transcript	coding_exon	12764291	12764471	.	-	0	Transcript "B0019.1" ; CDS "B0019.1"
+I	Coding_transcript	five_prime_UTR	12764938	12764949	.	-	.	Transcript "B0019.1"
+I	Coding_transcript	coding_exon	12764812	12764937	.	-	0	Transcript "B0019.1" ; CDS "B0019.1"
+X	SAGE_tag	SAGE_tag	6819353	6819366	.	+	.	Sequence SAGE:aacggagccg;count 9;Gene amx-2;Transcript B0019.1
+X 	gene 	processed_transcript 	944828 	948883 	.	-	.    	Gene "WBGene00004893"


=====================================
Tests/GFF/wormbase_gff2_alt.txt
=====================================
@@ -0,0 +1,9 @@
+Remanei_genome	Genomic_canonical	region	1	7816	.	+	.	Sequence "Contig1020";
+Contig102	WU_MERGED	CDS	1629	3377	.	-	.	CDS "cr01.sctg102.wum.2.1"
+Contig102	WU_MERGED	coding_exon	2927	3377	.	-	.	CDS "cr01.sctg102.wum.2.1"
+Contig102	WU_MERGED	coding_exon	2474	2875	.	-	.	CDS "cr01.sctg102.wum.2.1"
+Contig102	WU_MERGED	coding_exon	1928	2430	.	-	.	CDS "cr01.sctg102.wum.2.1"
+Contig102	WU_MERGED	coding_exon	1629	1883	.	-	.	CDS "cr01.sctg102.wum.2.1"
+Contig102	WU_MERGED	intron	2876	2926	.	-	.	CDS "cr01.sctg102.wum.2.1"
+Contig102	WU_MERGED	intron	2431	2473	.	-	.	CDS "cr01.sctg102.wum.2.1"
+Contig102	WU_MERGED	intron	1884	1927	.	-	.	CDS "cr01.sctg102.wum.2.1"


=====================================
Tests/test_GFFSeqIOFeatureAdder.py
=====================================
@@ -0,0 +1,684 @@
+"""Test decoration of existing SeqRecords with GFF through a SeqIO interface.
+"""
+import sys
+import os
+import unittest
+import pprint
+
+import six
+from six import StringIO
+
+from Bio import SeqIO
+from BCBio import GFF
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+from Bio.SeqFeature import SeqFeature, FeatureLocation
+from BCBio.GFF import (GFFExaminer, GFFParser, DiscoGFFParser)
+
+
+class MapReduceGFFTest(unittest.TestCase):
+    """Tests GFF parsing using a map-reduce framework for parallelization.
+    """
+
+    def setUp(self):
+        self._test_dir = os.path.join(os.path.dirname(__file__), "GFF")
+        self._test_gff_file = os.path.join(self._test_dir, "c_elegans_WS199_shortened_gff.txt")
+        self._disco_host = "http://localhost:7000"
+
+    def t_local_map_reduce(self):
+        """General map reduce framework without parallelization.
+        """
+        cds_limit_info = dict(gff_type=["gene", "mRNA", "CDS"], gff_id=['I'])
+        rec_dict = SeqIO.to_dict(GFF.parse(self._test_gff_file, limit_info=cds_limit_info))
+        test_rec = rec_dict['I']
+        assert len(test_rec.features) == 32
+
+    def t_disco_map_reduce(self):
+        """Map reduce framework parallelized using disco.
+        """
+        # this needs to be more generalized but fails okay with no disco
+        try:
+            import disco
+            import simplejson
+        except ImportError:
+            print("Skipping -- disco and json not found")
+            return
+        cds_limit_info = dict(
+            gff_source_type=[('Non_coding_transcript', 'gene'), ('Coding_transcript', 'gene'),
+                             ('Coding_transcript', 'mRNA'), ('Coding_transcript', 'CDS')],
+            gff_id=['I']
+        )
+        parser = DiscoGFFParser(disco_host=self._disco_host)
+        rec_dict = SeqIO.to_dict(parser.parse(self._test_gff_file, limit_info=cds_limit_info))
+        final_rec = rec_dict['I']
+        # second gene feature is multi-parent
+        assert len(final_rec.features) == 2  # two gene feature
+
+
+class GFF3Test(unittest.TestCase):
+    """Real live GFF3 tests from WormBase and NCBI.
+
+    Uses GFF3 data from:
+
+    ftp://ftp.wormbase.org/pub/wormbase/genomes/c_elegans/
+    genome_feature_tables/GFF3/
+    ftp://ftp.wormbase.org/pub/wormbase/genomes/c_elegans/sequences/dna/
+
+    and from NCBI.
+    """
+
+    def setUp(self):
+        self._test_dir = os.path.join(os.path.dirname(__file__), "GFF")
+        self._test_seq_file = os.path.join(self._test_dir, "c_elegans_WS199_dna_shortened.fa")
+        self._test_gff_file = os.path.join(self._test_dir, "c_elegans_WS199_shortened_gff.txt")
+        self._test_gff_ann_file = os.path.join(self._test_dir, "c_elegans_WS199_ann_gff.txt")
+        self._full_dir = "/usr/home/chapmanb/mgh/ruvkun_rnai/wormbase/" + \
+                "data_files_WS198"
+        self._test_ncbi = os.path.join(self._test_dir, "ncbi_gff3.txt")
+
+    def not_t_full_celegans(self):
+        """Test the full C elegans chromosome and GFF files.
+
+        This is used to test GFF on large files and is not run as a standard
+        test. You will need to download the files and adjust the paths
+        to run this.
+        """
+        # read the sequence information
+        seq_file = os.path.join(self._full_dir, "c_elegans.WS199.dna.fa")
+        gff_file = os.path.join(self._full_dir, "c_elegans.WS199.gff3")
+        seq_handle = open(seq_file)
+        seq_dict = SeqIO.to_dict(SeqIO.parse(seq_handle, "fasta"))
+        seq_handle.close()
+        #with open(gff_file) as gff_handle:
+        #    possible_limits = feature_adder.available_limits(gff_handle)
+        #    pprint.pprint(possible_limits)
+        rnai_types = [('Orfeome', 'PCR_product'), ('GenePair_STS', 'PCR_product'), ('Promoterome', 'PCR_product')]
+        gene_types = [('Non_coding_transcript', 'gene'), ('Coding_transcript', 'gene'), ('Coding_transcript', 'mRNA'),
+                      ('Coding_transcript', 'CDS')]
+        limit_info = dict(gff_source_type=rnai_types + gene_types)
+        for rec in GFF.parse(gff_file, seq_dict, limit_info=limit_info):
+            pass
+
+    def _get_seq_dict(self):
+        """Internal reusable function to get the sequence dictionary.
+        """
+        seq_handle = open(self._test_seq_file)
+        seq_dict = SeqIO.to_dict(SeqIO.parse(seq_handle, "fasta"))
+        seq_handle.close()
+        return seq_dict
+
+    def t_possible_limits(self):
+        """Calculate possible queries to limit a GFF file.
+        """
+        gff_examiner = GFFExaminer()
+        possible_limits = gff_examiner.available_limits(self._test_gff_file)
+        print()
+        pprint.pprint(possible_limits)
+
+    def t_parent_child(self):
+        """Summarize parent-child relationships in a GFF file.
+        """
+        gff_examiner = GFFExaminer()
+        pc_map = gff_examiner.parent_child_map(self._test_gff_file)
+        print()
+        pprint.pprint(pc_map)
+
+    def t_parent_child_file_modes(self):
+        """Summarize parent-child relationships in a GFF file.
+        """
+        gff_examiner = GFFExaminer()
+        # Use the loaded-from-filename as reference
+        pc_map = gff_examiner.parent_child_map(self._test_gff_file)
+
+        with open(self._test_gff_file, "rt") as handle:
+            assert pc_map == gff_examiner.parent_child_map(handle)
+
+        with open(self._test_gff_file, "rb") as handle:
+            if six.PY2:
+                assert pc_map == gff_examiner.parent_child_map(handle)
+            else:
+                try:
+                    gff_examiner.parent_child_map(handle)
+                except TypeError as e:
+                    assert str(e) == "input handle must be opened in text mode", e
+                else:
+                    assert False, "expected TypeError to be raised"
+
+    def t_flat_features(self):
+        """Check addition of flat non-nested features to multiple records.
+        """
+        seq_dict = self._get_seq_dict()
+        pcr_limit_info = dict(
+            gff_source_type=[('Orfeome', 'PCR_product'), ('GenePair_STS',
+                                                          'PCR_product'), ('Promoterome', 'PCR_product')]
+        )
+        parser = GFFParser()
+        rec_dict = SeqIO.to_dict(parser.parse(self._test_gff_file, seq_dict, limit_info=pcr_limit_info))
+        assert len(rec_dict['I'].features) == 4
+        assert len(rec_dict['X'].features) == 5
+
+    def t_nested_features(self):
+        """Check three-deep nesting of features with gene, mRNA and CDS.
+        """
+        seq_dict = self._get_seq_dict()
+        cds_limit_info = dict(
+            gff_source_type=[('Coding_transcript', 'gene'), ('Coding_transcript', 'mRNA'),
+                             ('Coding_transcript', 'CDS')],
+            gff_id=['I']
+        )
+        parser = GFFParser()
+        rec_dict = SeqIO.to_dict(parser.parse(self._test_gff_file, seq_dict, limit_info=cds_limit_info))
+        final_rec = rec_dict['I']
+        # first gene feature is plain
+        assert len(final_rec.features) == 2  # two gene feature
+        assert len(final_rec.features[0].sub_features) == 1  # one transcript
+        # 15 final CDS regions
+        assert len(final_rec.features[0].sub_features[0].sub_features) == 15
+
+    def t_nested_multiparent_features(self):
+        """Verify correct nesting of features with multiple parents.
+        """
+        seq_dict = self._get_seq_dict()
+        cds_limit_info = dict(
+            gff_source_type=[('Coding_transcript', 'gene'), ('Coding_transcript', 'mRNA'),
+                             ('Coding_transcript', 'CDS')],
+            gff_id=['I']
+        )
+        parser = GFFParser()
+        rec_dict = SeqIO.to_dict(parser.parse(self._test_gff_file, seq_dict, limit_info=cds_limit_info))
+        final_rec = rec_dict['I']
+        # second gene feature is multi-parent
+        assert len(final_rec.features) == 2  # two gene feature
+        cur_subs = final_rec.features[1].sub_features
+        assert len(cur_subs) == 3  # three transcripts
+        # the first and second transcript have the same CDSs
+        assert len(cur_subs[0].sub_features) == 6
+        assert len(cur_subs[1].sub_features) == 6
+        assert cur_subs[0].sub_features[0] is cur_subs[1].sub_features[0]
+
+    def t_no_dict_error(self):
+        """Ensure an error is raised when no dictionary to map to is present.
+        """
+        parser = GFFParser(create_missing=False)
+        try:
+            for rec in parser.parse(self._test_gff_file):
+                pass
+            # no error -- problem
+            raise AssertionError('Did not complain with missing dictionary')
+        except KeyError:
+            pass
+
+    def t_unknown_seq(self):
+        """Prepare unknown base sequences with the correct length.
+        """
+        rec_dict = SeqIO.to_dict(GFF.parse(self._test_gff_file))
+        assert len(rec_dict["I"].seq) == 12766937
+        assert len(rec_dict["X"].seq) == 17718531
+
+    def t_gff_annotations(self):
+        """Check GFF annotations placed on an entire sequence.
+        """
+        parser = GFFParser()
+        rec_dict = SeqIO.to_dict(parser.parse(self._test_gff_ann_file))
+        final_rec = rec_dict['I']
+        assert len(final_rec.annotations.keys()) == 2
+        assert final_rec.annotations['source'] == ['Expr_profile']
+        assert final_rec.annotations['expr_profile'] == ['B0019.1']
+
+    def t_gff3_iterator(self):
+        """Iterated parsing in GFF3 files with nested features.
+        """
+        parser = GFFParser()
+        recs = [r for r in parser.parse_in_parts(self._test_gff_file, target_lines=70)]
+        # should be one big set because we don't have a good place to split
+        assert len(recs) == 6
+        assert len(recs[0].features) == 59
+
+    def t_gff3_iterator_limit(self):
+        """Iterated interface using a limit query on GFF3 files.
+        """
+        cds_limit_info = dict(
+            gff_source_type=[('Coding_transcript', 'gene'), ('Coding_transcript', 'mRNA'),
+                             ('Coding_transcript', 'CDS')],
+            gff_id=['I']
+        )
+        parser = GFFParser()
+        rec_dict = SeqIO.to_dict(parser.parse(self._test_gff_file, limit_info=cds_limit_info))
+        assert len(rec_dict) == 1
+        tfeature = rec_dict["I"].features[0].sub_features[0]
+        for sub_test in tfeature.sub_features:
+            assert sub_test.type == "CDS", sub_test
+
+    def t_gff3_noval_attrib(self):
+        """Parse GFF3 file from NCBI with a key/value pair with no value..
+        """
+        parser = GFFParser()
+        rec_dict = SeqIO.to_dict(parser.parse(self._test_ncbi))
+        assert len(rec_dict) == 1
+        t_feature = list(rec_dict.values())[0].features[0]
+        assert t_feature.qualifiers["pseudo"] == ["true"]
+
+    def t_gff3_multiple_ids(self):
+        """Deal with GFF3 with non-unique ID attributes, using NCBI example.
+        """
+        parser = GFFParser()
+        rec_dict = SeqIO.to_dict(parser.parse(self._test_ncbi))
+        assert len(rec_dict) == 1
+        t_features = list(rec_dict.values())[0].features[1:]
+        # 4 feature sets, same ID, different positions, different attributes
+        assert len(t_features) == 4
+        for f in t_features:
+            assert len(f.sub_features) == 3
+
+    def t_simple_parsing(self):
+        """Parse GFF into a simple line by line dictionary without nesting.
+        """
+        parser = GFFParser()
+        num_lines = 0
+        for line_info in parser.parse_simple(self._test_gff_file):
+            num_lines += 1
+        assert num_lines == 177, num_lines
+        line_info = line_info['child'][0]
+        assert line_info['quals']['confirmed_est'] == \
+                ['yk1055g06.5', 'OSTF085G5_1']
+        assert line_info['location'] == [4582718, 4583189]
+
+    def t_simple_parsing_nesting(self):
+        """Simple parsing for lines with nesting, using the simplified API.
+        """
+        test_gff = os.path.join(self._test_dir, "transcripts.gff3")
+        num_lines = 0
+        for line_info in GFF.parse_simple(test_gff):
+            num_lines += 1
+        assert num_lines == 16, num_lines
+
+    def t_extra_comma(self):
+        """Correctly handle GFF3 files with extra trailing commas.
+        """
+        tfile = os.path.join(self._test_dir, "mouse_extra_comma.gff3")
+        in_handle = open(tfile)
+        for rec in GFF.parse(in_handle):
+            pass
+        in_handle.close()
+        tested = False
+        for sub_top in rec.features[0].sub_features:
+            for sub in sub_top.sub_features:
+                if sub.qualifiers.get("Name", "") == ["CDS:NC_000083.5:LOC100040603"]:
+                    tested = True
+                    assert len(sub.qualifiers["Parent"]) == 1
+        assert tested, "Did not find sub-feature to test"
+
+    def t_novalue_key(self):
+        """Handle GFF3 files with keys and no values.
+        """
+        tfile = os.path.join(self._test_dir, "glimmer_nokeyval.gff3")
+        rec = six.next(GFF.parse(tfile))
+        f1, f2 = rec.features
+        assert f1.qualifiers['ID'] == ['GL0000006']
+        assert len(f1.sub_features) == 2
+        assert f1.sub_features[0].qualifiers["Lack 3'-end"] == ["true"]
+        assert not "ID" in f1.sub_features[0].qualifiers
+        assert f2.qualifiers["Complete"] == ["true"]
+
+    def t_key_whitespace(self):
+        """Fix keys with problematic whitespace.
+        """
+        tfile = os.path.join(self._test_dir, "spaces.gff3")
+        for i, line_info in enumerate(GFF.parse_simple(tfile)):
+            if i > 2:
+                assert line_info["quals"]["foo"] == ["bar"]
+
+    def t_trans_spliicing(self):
+        """Parsing of transspliced genes from GFF3 spec where child locations don't match to parents.
+        """
+        fname = os.path.join(self._test_dir, "trans_splicing.gff3")
+        with open(fname) as in_handle:
+            rec = six.next(GFF.parse(in_handle))
+            assert len(rec.features) == 2
+            assert rec.features[0].id == "gene83"
+            assert len(rec.features[0].sub_features) == 2
+            assert len(rec.features[0].sub_features[0].sub_features) == 7
+
+            assert rec.features[1].id == "gene84"
+            assert len(rec.features[1].sub_features) == 2
+            assert len(rec.features[1].sub_features[0].sub_features) == 7
+
+
+class SolidGFFTester(unittest.TestCase):
+    """Test reading output from SOLiD analysis, as GFF3.
+
+    See more details on SOLiD GFF here:
+
+    http://solidsoftwaretools.com/gf/project/matogff/
+    """
+
+    def setUp(self):
+        self._test_dir = os.path.join(os.path.dirname(__file__), "GFF")
+        self._test_gff_file = os.path.join(self._test_dir, "F3-unique-3.v2.gff")
+
+    def t_basic_solid_parse(self):
+        """Basic parsing of SOLiD GFF results files.
+        """
+        parser = GFFParser()
+        rec_dict = SeqIO.to_dict(parser.parse(self._test_gff_file))
+        test_feature = rec_dict['3_341_424_F3'].features[0]
+        assert test_feature.location.nofuzzy_start == 102716
+        assert test_feature.location.nofuzzy_end == 102736
+        assert len(test_feature.qualifiers) == 7
+        assert test_feature.qualifiers['score'] == ['10.6']
+        assert test_feature.qualifiers['source'] == ['solid']
+        assert test_feature.strand == -1
+        assert test_feature.type == 'read'
+        assert test_feature.qualifiers['g'] == ['T2203031313223113212']
+        assert len(test_feature.qualifiers['q']) == 20
+
+    def t_solid_iterator(self):
+        """Iterated parsing in a flat file without nested features.
+        """
+        parser = GFFParser()
+        feature_sizes = []
+        for rec in parser.parse_in_parts(self._test_gff_file, target_lines=5):
+            feature_sizes.append(len(rec.features))
+        assert len(feature_sizes) == 112
+        assert max(feature_sizes) == 1
+
+    def t_line_adjust(self):
+        """Adjust lines during parsing to fix potential GFF problems.
+        """
+
+        def adjust_fn(results):
+            rec_index = results['quals']['i'][0]
+            read_name = results['rec_id']
+            results['quals']['read_name'] = [read_name]
+            results['rec_id'] = rec_index
+            return results
+
+        parser = GFFParser(line_adjust_fn=adjust_fn)
+        recs = [r for r in parser.parse(self._test_gff_file)]
+        assert len(recs) == 1
+        work_rec = recs[0]
+        assert work_rec.id == '1'
+        assert len(work_rec.features) == 112
+        assert work_rec.features[0].qualifiers['read_name'] == \
+                ['3_336_815_F3']
+
+
+class GFF2Tester(unittest.TestCase):
+    """Parse GFF2 and GTF files, building features.
+    """
+
+    def setUp(self):
+        self._test_dir = os.path.join(os.path.dirname(__file__), "GFF")
+        self._ensembl_file = os.path.join(self._test_dir, "ensembl_gtf.txt")
+        self._wormbase_file = os.path.join(self._test_dir, "wormbase_gff2.txt")
+        self._jgi_file = os.path.join(self._test_dir, "jgi_gff2.txt")
+        self._wb_alt_file = os.path.join(self._test_dir, "wormbase_gff2_alt.txt")
+
+    def t_basic_attributes(self):
+        """Parse out basic attributes of GFF2 from Ensembl GTF.
+        """
+        limit_info = dict(gff_source_type=[('snoRNA', 'exon')])
+        rec_dict = SeqIO.to_dict(GFF.parse(self._ensembl_file, limit_info=limit_info))
+        work_rec = rec_dict['I']
+        assert len(work_rec.features) == 1
+        test_feature = work_rec.features[0]
+        qual_keys = list(test_feature.qualifiers.keys())
+        qual_keys.sort()
+        assert qual_keys == [
+            'Parent', 'exon_number', 'gene_id', 'gene_name', 'source', 'transcript_id', 'transcript_name'
+        ]
+        assert test_feature.qualifiers['source'] == ['snoRNA']
+        assert test_feature.qualifiers['transcript_name'] == ['NR_001477.2']
+        assert test_feature.qualifiers['exon_number'] == ['1']
+
+    def t_tricky_semicolons(self):
+        """Parsing of tricky semi-colon positions in WormBase GFF2.
+        """
+        limit_info = dict(gff_source_type=[('Genomic_canonical', 'region')])
+        rec_dict = SeqIO.to_dict(GFF.parse(self._wormbase_file, limit_info=limit_info))
+        work_rec = rec_dict['I']
+        assert len(work_rec.features) == 1
+        test_feature = work_rec.features[0]
+        assert test_feature.qualifiers['Note'] == \
+          ['Clone cTel33B; Genbank AC199162', 'Clone cTel33B; Genbank AC199162'], test_feature.qualifiers["Note"]
+
+    def t_unescaped_semicolons(self):
+        """Parse inputs with unescaped semi-colons.
+        This is a band-aid to not fail rather than correct parsing, since
+        the combined feature will not be maintained.
+        """
+        f = os.path.join(self._test_dir, "unescaped-semicolon.gff3")
+        rec_dict = SeqIO.to_dict(GFF.parse(f))
+        f = rec_dict['chr1'].features[0]
+        assert f.qualifiers["Description"][0].startswith('osFTL6')
+        assert f.qualifiers["Description"][0].endswith('protein, expressed')
+
+    def t_jgi_gff(self):
+        """Parsing of JGI formatted GFF2, nested using transcriptId and proteinID
+        """
+        rec_dict = SeqIO.to_dict(GFF.parse(self._jgi_file))
+        tfeature = rec_dict['chr_1'].features[0]
+        assert tfeature.location.nofuzzy_start == 37060
+        assert tfeature.location.nofuzzy_end == 38216
+        assert tfeature.type == 'inferred_parent'
+        assert len(tfeature.sub_features) == 6
+        sfeature = tfeature.sub_features[1]
+        assert sfeature.qualifiers['proteinId'] == ['873']
+        assert sfeature.qualifiers['phase'] == ['0']
+
+    def t_ensembl_nested_features(self):
+        """Test nesting of features with GFF2 files using transcript_id.
+
+        XXX sub_features no longer supported in Biopython
+        """
+        rec_dict = SeqIO.to_dict(GFF.parse(self._ensembl_file))
+        assert len(rec_dict["I"].features) == 2
+        t_feature = rec_dict["I"].features[0]
+        #assert len(t_feature.sub_features) == 32, len(t_feature.sub_features)
+
+    def t_wormbase_nested_features(self):
+        """Test nesting of features with GFF2 files using Transcript only.
+        """
+        rec_dict = SeqIO.to_dict(GFF.parse(self._wormbase_file))
+        assert len(rec_dict) == 3
+        parent_features = [f for f in rec_dict["I"].features if f.type == "Transcript"]
+        assert len(parent_features) == 1
+        inferred_features = [f for f in rec_dict["I"].features if f.type == "inferred_parent"]
+        assert len(inferred_features) == 0
+        tfeature = parent_features[0]
+        assert tfeature.qualifiers["WormPep"][0] == "WP:CE40797"
+        assert len(tfeature.sub_features) == 46
+
+    def t_wb_cds_nested_features(self):
+        """Nesting of GFF2 features with a flat CDS key value pair.
+        """
+        rec_dict = SeqIO.to_dict(GFF.parse(self._wb_alt_file))
+        assert len(rec_dict) == 2
+        features = list(rec_dict.values())[0].features
+        assert len(features) == 1
+        tfeature = features[0]
+        assert tfeature.id == "cr01.sctg102.wum.2.1"
+        assert len(tfeature.sub_features) == 7
+
+    def t_gff2_iteration(self):
+        """Test iterated features with GFF2 files, breaking without parents.
+        """
+        recs = []
+        for rec in GFF.parse(self._wormbase_file, target_lines=15):
+            recs.append(rec)
+        assert len(recs) == 4
+        assert recs[0].features[0].type == 'region'
+        assert recs[0].features[1].type == 'SAGE_tag'
+        assert len(recs[0].features[2].sub_features) == 29
+
+
+class DirectivesTest(unittest.TestCase):
+    """Tests for parsing directives and other meta-data.
+    """
+
+    def setUp(self):
+        self._test_dir = os.path.join(os.path.dirname(__file__), "GFF")
+        self._gff_file = os.path.join(self._test_dir, "hybrid1.gff3")
+        self._problem_seq_region_file = os.path.join(self._test_dir, "problem_sequence_region.gff3")
+
+    def t_basic_directives(self):
+        """Parse out top level meta-data supplied in a GFF3 file.
+        """
+        recs = SeqIO.to_dict(GFF.parse(self._gff_file))
+        anns = recs['chr17'].annotations
+        assert anns['gff-version'] == ['3']
+        assert anns['attribute-ontology'] == ['baz']
+        assert anns['feature-ontology'] == ['bar']
+        assert anns['source-ontology'] == ['boo']
+        assert anns['sequence-region'] == [('foo', 0, 100), ('chr17', 62467933, 62469545)]
+
+    def t_fasta_directive(self):
+        """Parse FASTA sequence information contained in a GFF3 file.
+        """
+        recs = SeqIO.to_dict(GFF.parse(self._gff_file))
+        assert len(recs) == 1
+        test_rec = recs['chr17']
+        assert str(test_rec.seq) == "GATTACAGATTACA"
+
+    def t_examiner_with_fasta(self):
+        """Perform high level examination of files with FASTA directives..
+        """
+        examiner = GFFExaminer()
+        pc_map = examiner.parent_child_map(self._gff_file)
+        assert pc_map[('UCSC', 'mRNA')] == [('UCSC', 'CDS')]
+        limits = examiner.available_limits(self._gff_file)
+        assert list(limits['gff_id'].keys())[0][0] == 'chr17'
+        assert sorted(limits['gff_source_type'].keys()) == \
+                [('UCSC', 'CDS'), ('UCSC', 'mRNA')]
+
+    def t_problem_sequence_region(self):
+        """Avoid issues with sequence region directives lacking contigs
+        """
+        recs = SeqIO.to_dict(GFF.parse(self._problem_seq_region_file))
+        anns = recs['1'].annotations
+        assert anns['gff-version'] == ['3']
+        assert anns['sequence-region'] == [(0, 2482535)]
+
+
+class OutputTest(unittest.TestCase):
+    """Tests to write SeqFeatures to GFF3 output format.
+    """
+
+    def setUp(self):
+        self._test_dir = os.path.join(os.path.dirname(__file__), "GFF")
+        self._test_seq_file = os.path.join(self._test_dir, "c_elegans_WS199_dna_shortened.fa")
+        self._test_gff_file = os.path.join(self._test_dir, "c_elegans_WS199_shortened_gff.txt")
+        self._test_gff_ann_file = os.path.join(self._test_dir, "c_elegans_WS199_ann_gff.txt")
+        self._wormbase_file = os.path.join(self._test_dir, "wormbase_gff2.txt")
+
+    def t_gff3_to_gff3(self):
+        """Read in and write out GFF3 without any loss of information.
+        """
+        recs = SeqIO.to_dict(GFF.parse(self._test_gff_file))
+        out_handle = StringIO()
+        GFF.write(recs.values(), out_handle)
+        wrote_handle = StringIO(out_handle.getvalue())
+        recs_two = SeqIO.to_dict(GFF.parse(wrote_handle))
+
+        orig_rec = list(recs.values())[0]
+        re_rec = list(recs.values())[0]
+        assert len(orig_rec.features) == len(re_rec.features)
+        for i, orig_f in enumerate(orig_rec.features):
+            assert str(orig_f) == str(re_rec.features[i])
+
+    def t_gff2_to_gff3(self):
+        """Read in GFF2 and write out as GFF3.
+        """
+        recs = SeqIO.to_dict(GFF.parse(self._wormbase_file))
+        out_handle = StringIO()
+        GFF.write(recs.values(), out_handle)
+        wrote_handle = StringIO(out_handle.getvalue())
+        # check some tricky lines in the GFF2 file
+        checks = 0
+        for line in wrote_handle:
+            if line.find("Interpolated_map_position") >= 0:
+                checks += 1
+                assert line.find("RFLP=No") > 0
+            if line.find("Gene=WBGene00000138") > 0:
+                checks += 1
+                assert line.find("ID=B0019.1") > 0
+            if line.find("translated_nucleotide_match\t12762127") > 0:
+                checks += 1
+                assert line.find("Note=MSP:FADFSPLDVSDVNFATDDLAK") > 0
+        assert checks == 3, "Missing check line"
+
+    def t_write_from_recs(self):
+        """Write out GFF3 from SeqRecord inputs.
+        """
+        seq = Seq("GATCGATCGATCGATCGATC")
+        rec = SeqRecord(seq, "ID1")
+        qualifiers = {"source": "prediction", "score": 10.0, "other": ["Some", "annotations"], "ID": "gene1"}
+        sub_qualifiers = {"source": "prediction"}
+        top_feature = SeqFeature(FeatureLocation(0, 20), type="gene", strand=1, qualifiers=qualifiers)
+        top_feature.sub_features = [
+            SeqFeature(FeatureLocation(0, 5), type="exon", strand=1, qualifiers=sub_qualifiers),
+            SeqFeature(FeatureLocation(15, 20), type="exon", strand=1, qualifiers=sub_qualifiers)
+        ]
+        rec.features = [top_feature]
+        out_handle = StringIO()
+        GFF.write([rec], out_handle)
+        wrote_info = out_handle.getvalue().split("\n")
+        assert wrote_info[0] == "##gff-version 3"
+        assert wrote_info[1] == "##sequence-region ID1 1 20"
+        print(wrote_info[2].split("\t"))
+        assert wrote_info[2].split("\t") == [
+            'ID1', 'prediction', 'gene', '1', '20', '10.0', '+', '.', 'ID=gene1;other=Some,annotations'
+        ]
+        assert wrote_info[3].split("\t") == ['ID1', 'prediction', 'exon', '1', '5', '.', '+', '.', 'Parent=gene1']
+
+    def t_write_fasta(self):
+        """Include FASTA records in GFF output.
+        """
+        seq = Seq("GATCGATCGATCGATCGATC")
+        rec = SeqRecord(seq, "ID1")
+        qualifiers = {"source": "prediction", "score": 10.0, "other": ["Some", "annotations"], "ID": "gene1"}
+        rec.features = [SeqFeature(FeatureLocation(0, 20), type="gene", strand=1, qualifiers=qualifiers)]
+        out_handle = StringIO()
+        GFF.write([rec], out_handle, include_fasta=True)
+        wrote_info = out_handle.getvalue().split("\n")
+        fasta_parts = wrote_info[3:]
+        assert fasta_parts[0] == "##FASTA"
+        assert fasta_parts[1] == ">ID1 <unknown description>"
+        assert fasta_parts[2] == str(seq)
+
+    def t_write_seqrecord(self):
+        """Write single SeqRecords.
+        """
+        seq = Seq("GATCGATCGATCGATCGATC")
+        rec = SeqRecord(seq, "ID1")
+        qualifiers = {"source": "prediction", "score": 10.0, "other": ["Some", "annotations"], "ID": "gene1"}
+        rec.features = [SeqFeature(FeatureLocation(0, 20), type="gene", strand=1, qualifiers=qualifiers)]
+        out_handle = StringIO()
+        GFF.write([rec], out_handle, include_fasta=True)
+        wrote_info = out_handle.getvalue().split("\n")
+        gff_line = wrote_info[2]
+        assert gff_line.split("\t")[0] == "ID1"
+
+
+def run_tests(argv):
+    test_suite = testing_suite()
+    runner = unittest.TextTestRunner(sys.stdout, verbosity=2)
+    runner.run(test_suite)
+
+
+def testing_suite():
+    """Generate the suite of tests.
+    """
+    test_suite = unittest.TestSuite()
+    test_loader = unittest.TestLoader()
+    test_loader.testMethodPrefix = 't_'
+    tests = [GFF3Test, MapReduceGFFTest, SolidGFFTester, GFF2Tester, DirectivesTest, OutputTest]
+    #tests = [GFF3Test]
+    for test in tests:
+        cur_suite = test_loader.loadTestsFromTestCase(test)
+        test_suite.addTest(cur_suite)
+    return test_suite
+
+
+if __name__ == "__main__":
+    sys.exit(run_tests(sys.argv))


=====================================
bcbio_gff.egg-info/PKG-INFO
=====================================
@@ -1,10 +1,10 @@
 Metadata-Version: 1.0
 Name: bcbio-gff
-Version: 0.6.6
+Version: 0.6.7
 Summary: Read and write Generic Feature Format (GFF) with Biopython integration.
 Home-page: https://github.com/chapmanb/bcbb/tree/master/gff
 Author: Brad Chapman
 Author-email: chapmanb at 50mail.com
-License: UNKNOWN
+License: Biopython License
 Description: UNKNOWN
 Platform: UNKNOWN


=====================================
bcbio_gff.egg-info/SOURCES.txt
=====================================
@@ -8,6 +8,29 @@ BCBio/GFF/GFFOutput.py
 BCBio/GFF/GFFParser.py
 BCBio/GFF/__init__.py
 BCBio/GFF/_utils.py
+Scripts/gff/access_gff_index.py
+Scripts/gff/genbank_to_gff.py
+Scripts/gff/gff2_to_gff3.py
+Scripts/gff/gff_to_biosql.py
+Scripts/gff/gff_to_genbank.py
+Tests/test_GFFSeqIOFeatureAdder.py
+Tests/GFF/F3-unique-3.v2.gff
+Tests/GFF/c_elegans_WS199_ann_gff.txt
+Tests/GFF/c_elegans_WS199_dna_shortened.fa
+Tests/GFF/c_elegans_WS199_shortened_gff.txt
+Tests/GFF/ensembl_gtf.txt
+Tests/GFF/glimmer_nokeyval.gff3
+Tests/GFF/hybrid1.gff3
+Tests/GFF/jgi_gff2.txt
+Tests/GFF/mouse_extra_comma.gff3
+Tests/GFF/ncbi_gff3.txt
+Tests/GFF/problem_sequence_region.gff3
+Tests/GFF/spaces.gff3
+Tests/GFF/trans_splicing.gff3
+Tests/GFF/transcripts.gff3
+Tests/GFF/unescaped-semicolon.gff3
+Tests/GFF/wormbase_gff2.txt
+Tests/GFF/wormbase_gff2_alt.txt
 bcbio_gff.egg-info/PKG-INFO
 bcbio_gff.egg-info/SOURCES.txt
 bcbio_gff.egg-info/dependency_links.txt


=====================================
bcbio_gff.egg-info/requires.txt
=====================================
@@ -1 +1,2 @@
 six
+biopython


=====================================
setup.py
=====================================
@@ -14,8 +14,9 @@ setup(name="bcbio-gff",
       version=__version__,
       author="Brad Chapman",
       author_email="chapmanb at 50mail.com",
+      license="Biopython License",
       description="Read and write Generic Feature Format (GFF) with Biopython integration.",
       url="https://github.com/chapmanb/bcbb/tree/master/gff",
       packages=find_packages(),
-      install_requires=["six"]
+      install_requires=["six", "biopython"]
       )



View it on GitLab: https://salsa.debian.org/med-team/python-bcbio-gff/-/commit/0195e22774cb530a026fc57fdfe9096c350997d6

-- 
View it on GitLab: https://salsa.debian.org/med-team/python-bcbio-gff/-/commit/0195e22774cb530a026fc57fdfe9096c350997d6
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20211009/9a04fab0/attachment-0001.htm>


More information about the debian-med-commit mailing list