[med-svn] [Git][med-team/python-bcbio-gff][master] 3 commits: New upstream version 0.6.8

Nilesh Patra (@nilesh) gitlab at salsa.debian.org
Tue Nov 9 19:06:20 GMT 2021



Nilesh Patra pushed to branch master at Debian Med / python-bcbio-gff


Commits:
858f20e3 by Nilesh Patra at 2021-11-10T00:27:03+05:30
New upstream version 0.6.8
- - - - -
18ee330c by Nilesh Patra at 2021-11-10T00:27:04+05:30
Update upstream source from tag 'upstream/0.6.8'

Update to upstream version '0.6.8'
with Debian dir fcd1e1735185d0fdbd55698c219f4309a57cf92b
- - - - -
a0c8f7c5 by Nilesh Patra at 2021-11-10T00:27:39+05:30
Upload to unstable

- - - - -


9 changed files:

- BCBio/GFF/GFFParser.py
- BCBio/GFF/__init__.py
- PKG-INFO
- + Tests/GFF/hybrid2.fa
- + Tests/GFF/hybrid2.gff3
- Tests/test_GFFSeqIOFeatureAdder.py
- bcbio_gff.egg-info/PKG-INFO
- bcbio_gff.egg-info/SOURCES.txt
- debian/changelog


Changes:

=====================================
BCBio/GFF/GFFParser.py
=====================================
@@ -36,10 +36,8 @@ from Bio.SeqRecord import SeqRecord
 from Bio import SeqFeature
 from Bio import SeqIO
 from Bio import BiopythonDeprecationWarning
-
 warnings.simplefilter("ignore", BiopythonDeprecationWarning)
 
-
 def _gff_line_map(line, params):
     """Map part of Map-Reduce; parses a line of GFF into a dictionary.
 
@@ -50,7 +48,6 @@ def _gff_line_map(line, params):
         - determines the type of attribute (flat, parent, child or annotation)
         - generates a dictionary of GFF info which can be serialized as JSON
     """
-
     def _merge_keyvals(parts):
         """Merge key-values escaped by quotes that are improperly split at semicolons.
         """
@@ -67,7 +64,6 @@ def _gff_line_map(line, params):
         return out
 
     gff3_kw_pat = re.compile("\w+=")
-
     def _split_keyvals(keyval_str):
         """Split key-value pairs in a GFF2, GTF and GFF3 compatible way.
 
@@ -157,14 +153,15 @@ def _gff_line_map(line, params):
                         gff_parts["id"] = gff_parts["quals"][flat_name][0]
                         gff_parts["quals"]["ID"] = [gff_parts["id"]]
                 # children types
-                elif gff_parts["type"] in ["intron", "exon", "three_prime_UTR", "coding_exon", "five_prime_UTR", "CDS",
-                                           "stop_codon", "start_codon"]:
+                elif gff_parts["type"] in ["intron", "exon", "three_prime_UTR",
+                        "coding_exon", "five_prime_UTR", "CDS", "stop_codon",
+                        "start_codon"]:
                     gff_parts["quals"]["Parent"] = gff_parts["quals"][flat_name]
                 break
 
         return gff_parts
 
-    strand_map = {'+': 1, '-': -1, '?': None, None: None}
+    strand_map = {'+' : 1, '-' : -1, '?' : None, None: None}
     line = line.strip()
     if line[:2] == "##":
         return [('directive', line[2:])]
@@ -173,7 +170,8 @@ def _gff_line_map(line, params):
         should_do = True
         if params.limit_info:
             for limit_name, limit_values in params.limit_info.items():
-                cur_id = tuple([parts[i] for i in params.filter_info[limit_name]])
+                cur_id = tuple([parts[i] for i in 
+                    params.filter_info[limit_name]])
                 if cur_id not in limit_values:
                     should_do = False
                     break
@@ -204,7 +202,8 @@ def _gff_line_map(line, params):
             gff_info['rec_id'] = gff_parts[0]
             # if we are describing a location, then we are a feature
             if gff_parts[3] and gff_parts[4]:
-                gff_info['location'] = [int(gff_parts[3]) - 1, int(gff_parts[4])]
+                gff_info['location'] = [int(gff_parts[3]) - 1,
+                        int(gff_parts[4])]
                 gff_info['type'] = gff_parts[2]
                 gff_info['id'] = quals.get('ID', [''])[0]
                 gff_info['strand'] = strand_map.get(gff_parts[6], None)
@@ -235,7 +234,6 @@ def _gff_line_map(line, params):
                 return [(final_key, gff_info)]
     return []
 
-
 def _gff_line_reduce(map_results, out, params):
     """Reduce part of Map-Reduce; combines results of parsed features.
     """
@@ -252,7 +250,6 @@ def _gff_line_reduce(map_results, out, params):
             vals = simplejson.dumps(vals)
         out.add(key, vals)
 
-
 class _MultiIDRemapper:
     """Provide an ID remapping for cases where a parent has a non-unique ID.
 
@@ -260,7 +257,6 @@ class _MultiIDRemapper:
     by using the unique sequence region to assign children to the right
     parent.
     """
-
     def __init__(self, base_id, all_parents):
         self._base_id = base_id
         self._parents = all_parents
@@ -277,11 +273,9 @@ class _MultiIDRemapper:
         # if we haven't found a location match but parents are umabiguous, return that
         if len(self._parents) == 1:
             return self._base_id
-        raise ValueError(
-            "Did not find remapped ID location: %s, %s, %s" %
-            (self._base_id, [p['location'] for p in self._parents], feature_dict['location'])
-        )
-
+        raise ValueError("Did not find remapped ID location: %s, %s, %s" % (
+                self._base_id, [p['location'] for p in self._parents],
+                feature_dict['location']))
 
 class _AbstractMapReduceGFF:
     """Base class providing general GFF parsing for local and remote classes.
@@ -291,7 +285,6 @@ class _AbstractMapReduceGFF:
     the _gff_process function, which returns a dictionary of SeqRecord
     information.
     """
-
     def __init__(self, create_missing=True):
         """Initialize GFF parser 
 
@@ -320,7 +313,8 @@ class _AbstractMapReduceGFF:
         for rec in self.parse_in_parts(gff_files, base_dict, limit_info):
             yield rec
 
-    def parse_in_parts(self, gff_files, base_dict=None, limit_info=None, target_lines=None):
+    def parse_in_parts(self, gff_files, base_dict=None, limit_info=None,
+            target_lines=None):
         """Parse a region of a GFF file specified, returning info as generated.
 
         target_lines -- The number of lines in the file which should be used
@@ -371,7 +365,8 @@ class _AbstractMapReduceGFF:
         base = self._add_annotations(base, results.get('annotation', []))
         for feature in results.get('feature', []):
             (_, base) = self._add_toplevel_feature(base, feature)
-        base = self._add_parent_child_features(base, results.get('parent', []), results.get('child', []))
+        base = self._add_parent_child_features(base, results.get('parent', []),
+                results.get('child', []))
         base = self._add_seqs(base, results.get('fasta', []))
         base = self._add_directives(base, results.get('directive', []))
         return base
@@ -402,12 +397,35 @@ class _AbstractMapReduceGFF:
                 self._add_ann_to_rec(rec, key, vals)
         return base
 
+    def _get_matching_record_id(self, base, find_id):
+        """Find a matching base record with the test identifier, handling tricky cases.
+
+        NCBI IDs https://en.wikipedia.org/wiki/FASTA_format#NCBI_identifiers
+        """
+        # Straight matches for identifiers
+        if find_id in base:
+            return find_id
+        # NCBI style IDs in find_id
+        elif find_id.find("|") > 0:
+            for test_id in [x.strip() for x in find_id.split("|")[1:]]:
+                if test_id and test_id in base:
+                    return test_id
+        # NCBI style IDs in base IDs
+        else:
+            for base_id in base.keys():
+                if base_id.find("|") > 0:
+                    for test_id in [x.strip() for x in base_id.split("|")[1:]]:
+                        if test_id and test_id == find_id:
+                            return base_id
+        return None
+
     def _add_seqs(self, base, recs):
         """Add sequence information contained in the GFF3 to records.
         """
         for rec in recs:
-            if rec.id in base:
-                base[rec.id].seq = rec.seq
+            match_id = self._get_matching_record_id(base, rec.id)
+            if match_id:
+                base[match_id].seq = rec.seq
             else:
                 base[rec.id] = rec
         return base
@@ -424,15 +442,18 @@ class _AbstractMapReduceGFF:
                 if pid in multi_remap:
                     pid = multi_remap[pid].remap_id(child_dict)
                     child_feature.qualifiers['Parent'][pindex] = pid
-                children_prep[pid].append((child_dict['rec_id'], child_feature))
+                children_prep[pid].append((child_dict['rec_id'],
+                                           child_feature))
         children = dict(children_prep)
         # add children to parents that exist
         for cur_parent_dict in parents:
             cur_id = cur_parent_dict['id']
             if cur_id in multi_remap:
-                cur_parent_dict['id'] = multi_remap[cur_id].remap_id(cur_parent_dict)
+                cur_parent_dict['id'] = multi_remap[cur_id].remap_id(
+                        cur_parent_dict)
             cur_parent, base = self._add_toplevel_feature(base, cur_parent_dict)
-            cur_parent, children = self._add_children_to_parent(cur_parent, children)
+            cur_parent, children = self._add_children_to_parent(cur_parent,
+                                                                children)
         # create parents for children without them (GFF2 or split/bad files)
         while len(children) > 0:
             parent_id, cur_children = next(itertools.islice(children.items(), 1))
@@ -440,12 +461,15 @@ class _AbstractMapReduceGFF:
             if len(cur_children) == 1:
                 rec_id, child = cur_children[0]
                 loc = (child.location.nofuzzy_start, child.location.nofuzzy_end)
-                rec, base = self._get_rec(base, dict(rec_id=rec_id, location=loc))
+                rec, base = self._get_rec(base,
+                                          dict(rec_id=rec_id, location=loc))
                 rec.features.append(child)
                 del children[parent_id]
             else:
-                cur_parent, base = self._add_missing_parent(base, parent_id, cur_children)
-                cur_parent, children = self._add_children_to_parent(cur_parent, children)
+                cur_parent, base = self._add_missing_parent(base, parent_id,
+                                                            cur_children)
+                cur_parent, children = self._add_children_to_parent(cur_parent,
+                                                                    children)
         return base
 
     def _identify_dup_ids(self, parents):
@@ -458,7 +482,8 @@ class _AbstractMapReduceGFF:
         multi_ids = collections.defaultdict(list)
         for parent in parents:
             multi_ids[parent['id']].append(parent)
-        multi_ids = [(mid, ps) for (mid, ps) in multi_ids.items() if len(parents) > 1]
+        multi_ids = [(mid, ps) for (mid, ps) in multi_ids.items()
+                     if len(parents) > 1]
         multi_remap = dict()
         for mid, parents in multi_ids:
             multi_remap[mid] = _MultiIDRemapper(mid, parents)
@@ -507,19 +532,20 @@ class _AbstractMapReduceGFF:
         """Retrieve a record to add features to.
         """
         max_loc = info_dict.get('location', (0, 1))[1]
-        try:
-            cur_rec = base[info_dict['rec_id']]
+        match_id = self._get_matching_record_id(base, info_dict['rec_id'])
+        if match_id:
+            cur_rec = base[match_id]
             # update generated unknown sequences with the expected maximum length
             if isinstance(cur_rec.seq, UnknownSeq):
                 cur_rec.seq._length = max([max_loc, cur_rec.seq._length])
             return cur_rec, base
-        except KeyError:
-            if self._create_missing:
-                new_rec = SeqRecord(UnknownSeq(max_loc), info_dict['rec_id'])
-                base[info_dict['rec_id']] = new_rec
-                return new_rec, base
-            else:
-                raise
+        elif self._create_missing:
+            new_rec = SeqRecord(UnknownSeq(max_loc), info_dict['rec_id'])
+            base[info_dict['rec_id']] = new_rec
+            return new_rec, base
+        else:
+            raise KeyError("Did not find matching record in %s for %s" %
+                           (base.keys(), info_dict))
 
     def _add_missing_parent(self, base, parent_id, cur_children):
         """Add a new feature that is missing from the GFF file.
@@ -528,15 +554,13 @@ class _AbstractMapReduceGFF:
         child_strands = list(set(c[1].strand for c in cur_children))
         inferred_strand = child_strands[0] if len(child_strands) == 1 else None
         assert len(base_rec_id) > 0
-        feature_dict = dict(
-            id=parent_id,
-            strand=inferred_strand,
-            type="inferred_parent",
-            quals=dict(ID=[parent_id]),
-            rec_id=base_rec_id[0]
-        )
-        coords = [(c.location.nofuzzy_start, c.location.nofuzzy_end) for r, c in cur_children]
-        feature_dict["location"] = (min([c[0] for c in coords]), max([c[1] for c in coords]))
+        feature_dict = dict(id=parent_id, strand=inferred_strand,
+                            type="inferred_parent", quals=dict(ID=[parent_id]),
+                            rec_id=base_rec_id[0])
+        coords = [(c.location.nofuzzy_start, c.location.nofuzzy_end)
+                  for r, c in cur_children]
+        feature_dict["location"] = (min([c[0] for c in coords]),
+                                    max([c[1] for c in coords]))
         return self._add_toplevel_feature(base, feature_dict)
 
     def _add_toplevel_feature(self, base, feature_dict):
@@ -551,9 +575,8 @@ class _AbstractMapReduceGFF:
         """Retrieve a Biopython feature from our dictionary representation.
         """
         location = SeqFeature.FeatureLocation(*feature_dict['location'])
-        new_feature = SeqFeature.SeqFeature(
-            location, feature_dict['type'], id=feature_dict['id'], strand=feature_dict['strand']
-        )
+        new_feature = SeqFeature.SeqFeature(location, feature_dict['type'],
+                id=feature_dict['id'], strand=feature_dict['strand'])
         # Support for Biopython 1.68 and above, which removed sub_features
         if not hasattr(new_feature, "sub_features"):
             new_feature.sub_features = []
@@ -565,11 +588,9 @@ class _AbstractMapReduceGFF:
         """
         return list(SeqIO.parse(in_handle, "fasta"))
 
-
 class _GFFParserLocalOut:
     """Provide a collector for local GFF MapReduce file parsing.
     """
-
     def __init__(self, smart_breaks=False):
         self._items = dict()
         self._smart_breaks = smart_breaks
@@ -627,15 +648,13 @@ class _GFFParserLocalOut:
         self._last_parent = None
         return self._items
 
-
 class GFFParser(_AbstractMapReduceGFF):
     """Local GFF parser providing standardized parsing of GFF3 and GFF2 files.
     """
-
     def __init__(self, line_adjust_fn=None, create_missing=True):
         _AbstractMapReduceGFF.__init__(self, create_missing=create_missing)
         self._line_adjust_fn = line_adjust_fn
-
+    
     def _gff_process(self, gff_files, limit_info, target_lines):
         """Process GFF addition without any parallelization.
 
@@ -665,38 +684,39 @@ class GFFParser(_AbstractMapReduceGFF):
             if need_close:
                 in_handle.close()
 
-    def _lines_to_out_info(self, line_iter, limit_info=None, target_lines=None):
+    def _lines_to_out_info(self, line_iter, limit_info=None,
+            target_lines=None):
         """Generate SeqRecord and SeqFeatures from GFF file lines.
         """
         params = self._examiner._get_local_params(limit_info)
-        out_info = _GFFParserLocalOut((target_lines is not None and target_lines > 1))
+        out_info = _GFFParserLocalOut((target_lines is not None and
+                target_lines > 1))
         found_seqs = False
         for line in line_iter:
             results = self._map_fn(line, params)
             if self._line_adjust_fn and results:
                 if results[0][0] not in ['directive']:
-                    results = [(results[0][0], self._line_adjust_fn(results[0][1]))]
+                    results = [(results[0][0],
+                        self._line_adjust_fn(results[0][1]))]
             self._reduce_fn(results, out_info, params)
-            if (target_lines and out_info.num_lines >= target_lines and out_info.can_break):
+            if (target_lines and out_info.num_lines >= target_lines and
+                    out_info.can_break):
                 yield out_info.get_results()
-                out_info = _GFFParserLocalOut((target_lines is not None and target_lines > 1))
-            if (results and results[0][0] == 'directive' and results[0][1] == 'FASTA'):
+                out_info = _GFFParserLocalOut((target_lines is not None and
+                        target_lines > 1))
+            if (results and results[0][0] == 'directive' and 
+                    results[0][1] == 'FASTA'):
                 found_seqs = True
                 break
 
         class FakeHandle:
-
             def __init__(self, line_iter):
                 self._iter = line_iter
-
             def __iter__(self):
                 return self
-
             def __next__(self):
                 return next(self._iter)
-
             next = __next__
-
             def read(self, size=-1):
                 if size < 0:
                     return "".join(l for l in self._iter)
@@ -704,7 +724,6 @@ class GFFParser(_AbstractMapReduceGFF):
                     return ""  # Used by Biopython to sniff unicode vs bytes
                 else:
                     raise NotImplementedError
-
             def readline(self):
                 try:
                     return next(self._iter)
@@ -717,11 +736,9 @@ class GFFParser(_AbstractMapReduceGFF):
         if out_info.has_items():
             yield out_info.get_results()
 
-
 class DiscoGFFParser(_AbstractMapReduceGFF):
     """GFF Parser with parallelization through Disco (http://discoproject.org.
     """
-
     def __init__(self, disco_host, create_missing=True):
         """Initialize parser.
         
@@ -738,36 +755,32 @@ class DiscoGFFParser(_AbstractMapReduceGFF):
         # make these imports local; only need them when using disco
         import simplejson
         import disco
-        # absolute path names unless they are special disco files
+        # absolute path names unless they are special disco files 
         full_files = []
         for f in gff_files:
             if f.split(":")[0] != "disco":
                 full_files.append(os.path.abspath(f))
             else:
                 full_files.append(f)
-        results = disco.job(
-            self._disco_host,
-            name="gff_reader",
-            input=full_files,
-            params=disco.Params(limit_info=limit_info, jsonify=True, filter_info=self._examiner._filter_info),
-            required_modules=["simplejson", "collections", "re"],
-            map=self._map_fn,
-            reduce=self._reduce_fn
-        )
+        results = disco.job(self._disco_host, name="gff_reader",
+                input=full_files,
+                params=disco.Params(limit_info=limit_info, jsonify=True,
+                    filter_info=self._examiner._filter_info),
+                required_modules=["simplejson", "collections", "re"],
+                map=self._map_fn, reduce=self._reduce_fn)
         processed = dict()
         for out_key, out_val in disco.result_iterator(results):
             processed[out_key] = simplejson.loads(out_val)
         yield processed
 
-
 def parse(gff_files, base_dict=None, limit_info=None, target_lines=None):
     """High level interface to parse GFF files into SeqRecords and SeqFeatures.
     """
     parser = GFFParser()
-    for rec in parser.parse_in_parts(gff_files, base_dict, limit_info, target_lines):
+    for rec in parser.parse_in_parts(gff_files, base_dict, limit_info,
+            target_lines):
         yield rec
 
-
 def parse_simple(gff_files, limit_info=None):
     """Parse GFF files as line by line dictionary of parts.
     """
@@ -784,11 +797,9 @@ def parse_simple(gff_files, limit_info=None):
         else:
             assert "directive" in rec
 
-
 def _file_or_handle(fn):
     """Decorator to handle either an input handle or a file.
     """
-
     def _file_or_handle_inside(*args, **kwargs):
         in_file = args[1]
         if hasattr(in_file, "read"):
@@ -804,10 +815,8 @@ def _file_or_handle(fn):
         if need_close:
             in_handle.close()
         return out
-
     return _file_or_handle_inside
 
-
 class GFFExaminer:
     """Provide high level details about a GFF file to refine parsing.
 
@@ -817,22 +826,19 @@ class GFFExaminer:
     information you need. This class provides high level summary details to
     help in learning.
     """
-
     def __init__(self):
-        self._filter_info = dict(gff_id=[0], gff_source_type=[1, 2], gff_source=[1], gff_type=[2])
-
+        self._filter_info = dict(gff_id = [0], gff_source_type = [1, 2],
+                gff_source = [1], gff_type = [2])
+    
     def _get_local_params(self, limit_info=None):
-
         class _LocalParams:
-
             def __init__(self):
                 self.jsonify = False
-
         params = _LocalParams()
         params.limit_info = limit_info
         params.filter_info = self._filter_info
         return params
-
+    
     @_file_or_handle
     def available_limits(self, gff_handle):
         """Return dictionary information on possible limits for this file.
@@ -889,12 +895,16 @@ class GFFExaminer:
             if line.startswith("##FASTA"):
                 break
             if line.strip() and not line.startswith("#"):
-                line_type, line_info = _gff_line_map(line, self._get_local_params())[0]
-                if (line_type == 'parent' or (line_type == 'child' and line_info['id'])):
-                    parent_sts[line_info['id']] = (line_info['quals'].get('source', [""])[0], line_info['type'])
+                line_type, line_info = _gff_line_map(line,
+                        self._get_local_params())[0]
+                if (line_type == 'parent' or (line_type == 'child' and
+                        line_info['id'])):
+                    parent_sts[line_info['id']] = (
+                            line_info['quals'].get('source', [""])[0], line_info['type'])
                 if line_type == 'child':
                     for parent_id in line_info['quals']['Parent']:
-                        child_sts[parent_id].append((line_info['quals'].get('source', [""])[0], line_info['type']))
+                        child_sts[parent_id].append((
+                            line_info['quals'].get('source', [""])[0], line_info['type']))
         #print parent_sts, child_sts
         # generate a dictionary of the unique final type relationships
         pc_map = collections.defaultdict(list)


=====================================
BCBio/GFF/__init__.py
=====================================
@@ -3,4 +3,4 @@
 from BCBio.GFF.GFFParser import GFFParser, DiscoGFFParser, GFFExaminer, parse, parse_simple
 from BCBio.GFF.GFFOutput import GFF3Writer, write
 
-__version__ = "0.6.7"
+__version__ = "0.6.8"


=====================================
PKG-INFO
=====================================
@@ -1,6 +1,6 @@
 Metadata-Version: 1.0
 Name: bcbio-gff
-Version: 0.6.7
+Version: 0.6.8
 Summary: Read and write Generic Feature Format (GFF) with Biopython integration.
 Home-page: https://github.com/chapmanb/bcbb/tree/master/gff
 Author: Brad Chapman


=====================================
Tests/GFF/hybrid2.fa
=====================================
@@ -0,0 +1,3 @@
+>lcl|chr17
+GATTACA
+GATTACA


=====================================
Tests/GFF/hybrid2.gff3
=====================================
@@ -0,0 +1,17 @@
+##gff-version 3
+##sequence-region foo 1 100
+##feature-ontology bar
+##attribute-ontology baz
+##source-ontology boo
+##sequence-region chr17 62467934 62469545 
+chr17	UCSC	mRNA	62467934	62469545	.	-	.	ID=A00469;Dbxref=AFFX-U133:205840_x_at,Locuslink:2688,Genbank-mRNA:A00469,Swissprot:P01241,PFAM:PF00103,AFFX-U95:1332_f_at,Swissprot:SOMA_HUMAN;Note=growth%20hormone%201;Alias=GH1
+chr17	UCSC	CDS	62468039	62468236	.	-	1	Parent=A00469
+chr17	UCSC	CDS	62468490	62468654	.	-	2	Parent=A00469
+chr17	UCSC	CDS	62468747	62468866	.	-	1	Parent=A00469
+chr17	UCSC	CDS	62469076	62469236	.	-	1	Parent=A00469
+chr17	UCSC	CDS	62469497	62469506	.	-	0	Parent=A00469
+###
+##FASTA
+>lcl|chr17
+GATTACA
+GATTACA


=====================================
Tests/test_GFFSeqIOFeatureAdder.py
=====================================
@@ -519,6 +519,8 @@ class DirectivesTest(unittest.TestCase):
     def setUp(self):
         self._test_dir = os.path.join(os.path.dirname(__file__), "GFF")
         self._gff_file = os.path.join(self._test_dir, "hybrid1.gff3")
+        self._ncbi_gff = os.path.join(self._test_dir, "hybrid2.gff3")
+        self._ncbi_fa = os.path.join(self._test_dir, "hybrid2.fa")
         self._problem_seq_region_file = os.path.join(self._test_dir, "problem_sequence_region.gff3")
 
     def t_basic_directives(self):
@@ -540,6 +542,24 @@ class DirectivesTest(unittest.TestCase):
         test_rec = recs['chr17']
         assert str(test_rec.seq) == "GATTACAGATTACA"
 
+    def t_fasta_directive_w_ncbi(self):
+        """Parse FASTA sequence information contained in a GFF3 file with NCBI style IDs.
+        """
+        with open(self._ncbi_fa) as seq_handle:
+            seq_dict = SeqIO.to_dict(SeqIO.parse(seq_handle, "fasta"))
+        recs = SeqIO.to_dict(GFF.parse(self._ncbi_gff, seq_dict))
+        assert len(recs) == 1
+        test_rec = recs['lcl|chr17']
+        assert str(test_rec.seq) == "GATTACAGATTACA"
+
+    def t_fasta_directive_w_ncbi_fa(self):
+        """Parse FASTA sequence information contained in a separate file with NCBI style IDs.
+        """
+        recs = SeqIO.to_dict(GFF.parse(self._ncbi_gff))
+        assert len(recs) == 1
+        test_rec = recs['chr17']
+        assert str(test_rec.seq) == "GATTACAGATTACA"
+
     def t_examiner_with_fasta(self):
         """Perform high level examination of files with FASTA directives.
         """


=====================================
bcbio_gff.egg-info/PKG-INFO
=====================================
@@ -1,6 +1,6 @@
 Metadata-Version: 1.0
 Name: bcbio-gff
-Version: 0.6.7
+Version: 0.6.8
 Summary: Read and write Generic Feature Format (GFF) with Biopython integration.
 Home-page: https://github.com/chapmanb/bcbb/tree/master/gff
 Author: Brad Chapman


=====================================
bcbio_gff.egg-info/SOURCES.txt
=====================================
@@ -21,6 +21,8 @@ Tests/GFF/c_elegans_WS199_shortened_gff.txt
 Tests/GFF/ensembl_gtf.txt
 Tests/GFF/glimmer_nokeyval.gff3
 Tests/GFF/hybrid1.gff3
+Tests/GFF/hybrid2.fa
+Tests/GFF/hybrid2.gff3
 Tests/GFF/jgi_gff2.txt
 Tests/GFF/mouse_extra_comma.gff3
 Tests/GFF/ncbi_gff3.txt


=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+python-bcbio-gff (0.6.8-1) unstable; urgency=medium
+
+  * Team Upload.
+  * New upstream version 0.6.8
+
+ -- Nilesh Patra <nilesh at debian.org>  Wed, 10 Nov 2021 00:27:07 +0530
+
 python-bcbio-gff (0.6.7-1) unstable; urgency=medium
 
   * New upstream version



View it on GitLab: https://salsa.debian.org/med-team/python-bcbio-gff/-/compare/7b0b85cfc0ad17015d0fd9c5a5ef186e6be6d2bf...a0c8f7c5b3bfaa3b3a269bb546b80e7ac84166a8

-- 
View it on GitLab: https://salsa.debian.org/med-team/python-bcbio-gff/-/compare/7b0b85cfc0ad17015d0fd9c5a5ef186e6be6d2bf...a0c8f7c5b3bfaa3b3a269bb546b80e7ac84166a8
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20211109/a1a4025f/attachment-0001.htm>


More information about the debian-med-commit mailing list