[med-svn] [Git][med-team/python-gffutils][master] 3 commits: New upstream version 0.11.1

Nilesh Patra (@nilesh) gitlab at salsa.debian.org
Thu Oct 13 17:46:43 BST 2022

Nilesh Patra pushed to branch master at Debian Med / python-gffutils

98709811 by Nilesh Patra at 2022-10-13T22:09:53+05:30
New upstream version 0.11.1
- - - - -
1d5cabd8 by Nilesh Patra at 2022-10-13T22:09:58+05:30
Update upstream source from tag 'upstream/0.11.1'

Update to upstream version '0.11.1'
with Debian dir 92e09dc3702697f8a181af7c350483c648492b0c
- - - - -
67b8df0b by Nilesh Patra at 2022-10-13T22:10:16+05:30
Upload to unstable

- - - - -

11 changed files:

- debian/changelog
- gffutils.egg-info/PKG-INFO
- gffutils.egg-info/SOURCES.txt
- gffutils/create.py
- gffutils/interface.py
- + gffutils/test/data/a.py
- + gffutils/test/data/dm6-chr2L.fa.fai
- + gffutils/test/data/issue_197.gff
- gffutils/test/test_issues.py
- gffutils/version.py


@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: gffutils
-Version: 0.11.0
+Version: 0.11.1
 Summary: Work with GFF and GTF files in a flexible database framework
 Home-page: https://github.com/daler/gffutils
 Author: Ryan Dale

@@ -1,3 +1,10 @@
+python-gffutils (0.11.1-1) unstable; urgency=medium
+  * Team upload.
+  * New upstream version 0.11.1
+ -- Nilesh Patra <nilesh at debian.org>  Thu, 13 Oct 2022 22:10:06 +0530
 python-gffutils (0.11.0-2) UNRELEASED; urgency=medium
   * Team upload.

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: gffutils
-Version: 0.11.0
+Version: 0.11.1
 Summary: Work with GFF and GTF files in a flexible database framework
 Home-page: https://github.com/daler/gffutils
 Author: Ryan Dale

@@ -49,10 +49,12 @@ gffutils/test/data/FBgn0031208.gtf
@@ -69,6 +71,7 @@ gffutils/test/data/intro_docs_example.gff

@@ -67,7 +67,7 @@ class _DBCreator(object):
-        text_factory=sqlite3.OptimizedUnicode,
+        text_factory=str,
@@ -1111,7 +1111,7 @@ def create_db(
-    text_factory=sqlite3.OptimizedUnicode,
+    text_factory=str,
@@ -1321,11 +1321,7 @@ def create_db(
         available, since these fields need to be integers.
     text_factory : callable
-        Text factory to use for the sqlite3 database.  See
-        https://docs.python.org/2/library/\
-                sqlite3.html#sqlite3.Connection.text_factory
-        for details. The default sqlite3.OptimizedUnicode will return Unicode
-        objects only for non-ASCII data, and bytestrings otherwise.
+        Text factory to use for the sqlite3 database.
     pragmas : dict
         Dictionary of pragmas used when creating the sqlite3 database. See

@@ -819,7 +819,8 @@ class FeatureDB(object):
         Providing N features will return N - 1 new features.
         This method purposefully does *not* do any merging or sorting of
-        coordinates, so you may want to use :meth:`FeatureDB.merge` first, or
+        coordinates. So nested or overlapping features may not behave as you
+        might expect. You may want to use :meth:`FeatureDB.merge` first, and
         when selecting features use the `order_by` kwarg, e.g.,
         `db.features_of_type('gene', order_by=('seqid', 'start'))`.
@@ -866,43 +867,89 @@ class FeatureDB(object):
         A generator that yields :class:`Feature` objects
+        def _init_interfeature(f):
+            """
+            Used to initialize a new interfeature that is ready to be updated
+            in-place.
+            """
+            keys = ['id', 'seqid', 'source', 'featuretype', 'start', 'end',
+                    'score', 'strand', 'frame', 'attributes', 'bin']
+            d = dict(zip(keys, f.astuple()))
+            d['source'] = 'gffutils_derived'
+            return d
+        def _prep_for_yield(d):
+            """
+            Finalize the interfeature by adjusting coords, recalculating the
+            bin, and creating a feature using self._feature_returner.
+            If start is greater than stop (which happens when trying to get
+            interfeatures for overlapping features), then return None.
+            """
+            d['start'] += 1
+            d['end'] -= 1
+            new_bin = bins.bins(d['start'], d['end'], one=True)
+            d['bin'] = new_bin
+            if d['start'] > d['end']:
+                return None
+            return self._feature_returner(**d)
+        # If not provided, use a no-op function instead.
+        if not attribute_func:
+            def attribute_func(a):
+                return a
         for i, f in enumerate(features):
-            # no inter-feature for the first one
+            # First feature: initialize an interfeature and continue to the next.
             if i == 0:
-                interfeature_start = f.stop
+                interfeature = _init_interfeature(f)
+                last_feature = f
+                nfeatures = 1
+                continue
+            # Yield the last interfeature (if we saw at least 2 features) and
+            # start a new interfeature on this chrom.
+            if f.chrom != last_feature.chrom:
+                if nfeatures > 1:
+                    new_feature = _prep_for_yield(interfeature)
+                    if new_feature:
+                        yield new_feature
+                interfeature = _init_interfeature(f)
                 last_feature = f
+                nfeatures = 1
-            interfeature_stop = f.start
+            # Otherwise, we've already seen a feature on this chrom so
+            # this is the second.
+            nfeatures += 1
+            # Adjust the interfeature dict in-place with coords...
+            interfeature['start'] = last_feature.stop
+            interfeature['end'] = f.start
+            # ...featuretype
             if new_featuretype is None:
-                new_featuretype = "inter_%s_%s" % (
+                interfeature['featuretype'] = "inter_%s_%s" % (
-            if last_feature.strand != f.strand:
-                new_strand = "."
-                new_strand = f.strand
-            if last_feature.chrom != f.chrom:
-                # We've moved to a new chromosome.  For example, if we're
-                # getting intergenic regions from all genes, they will be on
-                # different chromosomes. We still assume sorted features, but
-                # don't complain if they're on different chromosomes -- just
-                # move on.
-                last_feature = f
-                continue
-            strand = new_strand
-            chrom = last_feature.chrom
+                interfeature['featuretype'] = new_featuretype
-            # Shrink
-            interfeature_start += 1
-            interfeature_stop -= 1
+            # ...strand
+            if last_feature.strand != f.strand:
+                interfeature['strand'] = '.'
+            else:
+                interfeature['strand'] = f.strand
+            # and attributes
             if merge_attributes:
                 new_attributes = helpers.merge_attributes(
-                    last_feature.attributes, f.attributes,
+                    attribute_func(last_feature.attributes),
+                    attribute_func(f.attributes),
@@ -911,31 +958,14 @@ class FeatureDB(object):
             if update_attributes:
-            new_bin = bins.bins(interfeature_start, interfeature_stop, one=True)
-            _id = None
-            fields = dict(
-                seqid=chrom,
-                source="gffutils_derived",
-                featuretype=new_featuretype,
-                start=interfeature_start,
-                end=interfeature_stop,
-                score=".",
-                strand=strand,
-                frame=".",
-                attributes=new_attributes,
-                bin=new_bin,
-            )
+            interfeature['attributes'] = new_attributes
+            # Ready to yield
+            new_feature = _prep_for_yield(interfeature)
+            if new_feature:
+                yield new_feature
+            nfeatures = 1
-            if dialect is None:
-                # Support for @classmethod -- if calling from the class, then
-                # self.dialect is not defined, so defer to Feature's default
-                # (which will be constants.dialect, or GFF3).
-                try:
-                    dialect = self.dialect
-                except AttributeError:
-                    dialect = None
-            yield self._feature_returner(**fields)
-            interfeature_start = f.stop
             last_feature = f
     def delete(self, features, make_backup=True, **kwargs):

@@ -0,0 +1,22 @@
+import gffutils
+db = gffutils.create_db('issue_197.gff', ':memory:', merge_strategy='error')
+genes = list(db.features_of_type('gene'))
+genes = list(db.merge(genes))
+igss = list( db.interfeatures(genes,new_featuretype='intergenic_space') )
+def transform(f):
+    f['ID'] = [ '-'.join(f.attributes['ID']) ]
+    return f
+for i in igss:
+    print(transform(i))
+db = db.update(igss, transform=transform, merge_strategy='error')
+for i in db.all_features(order_by=('seqid', 'start')):
+    print(i)

@@ -0,0 +1 @@
+chr2L	2450	7	50	51

@@ -0,0 +1,39 @@
+tig00000012	EVM	gene	2181975	2182655	.	+	.	ID=ctg012.gene0754;Name=gene0754
+tig00000012	EVM	mRNA	2181975	2182655	.	+	.	ID=ctg012.mRNA0754;Parent=ctg012.gene0754;Name=mRNA0754
+tig00000012	EVM	exon	2181975	2182655	.	+	.	ID=ctg012.mRNA0754.exon01;Parent=ctg012.mRNA0754
+tig00000012	EVM	CDS	2181975	2182655	.	+	0	ID=ctg012.mRNA0754.CDS01;Parent=ctg012.mRNA0754
+tig00000492	EVM	gene	46225	47235	.	-	.	ID=ctg492.gene0001;Name=gene0001
+tig00000492	EVM	mRNA	46225	47235	.	-	.	ID=ctg492.mRNA0001;Parent=ctg492.gene0001;Name=mRNA0001
+tig00000492	EVM	exon	46225	47235	.	-	.	ID=ctg492.mRNA0001.exon01;Parent=ctg492.mRNA0001
+tig00000492	EVM	CDS	46225	47235	.	-	0	ID=ctg492.mRNA0001.CDS01;Parent=ctg492.mRNA0001
+tig00000492	EVM	gene	47351	48256	.	-	.	ID=ctg492.gene0002;Name=gene0002
+tig00000492	EVM	mRNA	47351	48256	.	-	.	ID=ctg492.mRNA0002;Parent=ctg492.gene0002;Name=mRNA0002
+tig00000492	EVM	exon	47351	48256	.	-	.	ID=ctg492.mRNA0002.exon01;Parent=ctg492.mRNA0002
+tig00000492	EVM	CDS	47351	48256	.	-	0	ID=ctg492.mRNA0002.CDS01;Parent=ctg492.mRNA0002
+tig00000492	EVM	gene	50000	50009	.	-	.	ID=gene0
+# This is a long gene overlapping others. It should not yield an interfeature
+# with the previous gene (since it overlaps) but it also should not prevent
+# subsequent interfeatures. The docstring points out that nested features like
+# this should be merged. When genes are merged, then the next interfeature
+# shouldn't be until 50086 to 50089.
+tig00000492	EVM	gene	50000	50085	.	-	.	ID=gene00
+tig00000492	EVM	gene	50009	50029	.	-	.	ID=gene1
+# (no interfeature here since genes are contiguous)
+tig00000492	EVM	gene	50030	50032	.	-	.	ID=gene2
+# gene3 overlaps with gene2, so should not give interfeature here
+tig00000492	EVM	gene	50030	50049	.	-	.	ID=gene3
+tig00000492	EVM	gene	50055	50070	.	-	.	ID=gene4
+# interfeature created here should be length 1 (50071 to 50071)
+tig00000492	EVM	gene	50072	50075	.	-	.	ID=gene5
+# interfeature should be 50076 to 50089
+tig00000492	EVM	gene	50090	50100	.	-	.	ID=gene6

@@ -92,7 +92,7 @@ def test_issue_107():
     assert [str(i) for i in interfeatures] == [
-        "chr2\tgffutils_derived\tinter_gene_gene\t16\t54\t.\t-\t.\tID=c,d;",
+        "chr2\tgffutils_derived\tinter_gene_gene\t51\t54\t.\t-\t.\tID=c,d;",
@@ -184,9 +184,10 @@ def test_pr_139():
     inter = list(db.interfeatures(exons))
     # previously, the first exon's attributes would show up in subsequent merged features
-    assert exons[0].attributes["Name"][0] not in inter[1].attributes["Name"]
-    assert exons[0].attributes["Name"][0] not in inter[2].attributes["Name"]
-    assert exons[0].attributes["Name"][0] not in inter[3].attributes["Name"]
+    first_name = exons[0].attributes["Name"][0]
+    for i in inter[1:]:
+        if "Name" in i.attributes:
+            assert first_name not in i.attributes["Name"], str(i)
 def test_pr_144():
@@ -399,3 +400,30 @@ def test_issue_181():
         return ','.join(f['ID'])
     db.update(introns, id_spec={'intron': [intron_id]})
+def test_issue_197():
+    # Previously this would fail with ValueError due to using the stop position
+    # of the last item on the previous chrom as the start position.
+    db = gffutils.create_db(gffutils.example_filename('issue_197.gff'), ':memory:', merge_strategy='error')
+    genes = list(db.features_of_type('gene'))
+    igss = list( db.interfeatures(genes,new_featuretype='intergenic_space') )
+    def transform(f):
+        f['ID'] = [ '-'.join(f.attributes['ID']) ]
+        return f
+    db = db.update(igss, transform=transform, merge_strategy='error')
+    obs = list(db.features_of_type('intergenic_space'))
+    for i in obs:
+        print(i)
+    assert [str(i) for i in obs] == [
+        'tig00000492\tgffutils_derived\tintergenic_space\t47236\t47350\t.\t-\t.\tID=ctg492.gene0001-ctg492.gene0002;Name=gene0001,gene0002',
+        'tig00000492\tgffutils_derived\tintergenic_space\t48257\t49999\t.\t-\t.\tID=ctg492.gene0002-gene0;Name=gene0002',
+        'tig00000492\tgffutils_derived\tintergenic_space\t50050\t50054\t.\t-\t.\tID=gene3-gene4',
+        'tig00000492\tgffutils_derived\tintergenic_space\t50071\t50071\t.\t-\t.\tID=gene4-gene5',
+        'tig00000492\tgffutils_derived\tintergenic_space\t50076\t50089\t.\t-\t.\tID=gene5-gene6',
+    ]

@@ -1 +1 @@
-version = "0.11.0"
+version = "0.11.1"

View it on GitLab: https://salsa.debian.org/med-team/python-gffutils/-/compare/d8b4386f15ed594a0527f20d6eafbcb7c6118b77...67b8df0b39fb6ba281acf508281d6b6de418c3b3

View it on GitLab: https://salsa.debian.org/med-team/python-gffutils/-/compare/d8b4386f15ed594a0527f20d6eafbcb7c6118b77...67b8df0b39fb6ba281acf508281d6b6de418c3b3
You're receiving this email because of your account on salsa.debian.org.

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20221013/25560284/attachment-0001.htm>

More information about the debian-med-commit mailing list