[med-svn] [Git][med-team/python-gffutils][upstream] New upstream version 0.13
Alexandre Detiste (@detiste-guest)
gitlab at salsa.debian.org
Tue Apr 16 07:49:09 BST 2024
Alexandre Detiste pushed to branch upstream at Debian Med / python-gffutils
Commits:
26f906ca by Alexandre Detiste at 2024-04-13T20:30:45+02:00
New upstream version 0.13
- - - - -
26 changed files:
- PKG-INFO
- gffutils.egg-info/PKG-INFO
- gffutils.egg-info/SOURCES.txt
- gffutils.egg-info/requires.txt
- gffutils/attributes.py
- gffutils/biopython_integration.py
- gffutils/convert.py
- gffutils/create.py
- gffutils/feature.py
- gffutils/gffwriter.py
- gffutils/helpers.py
- gffutils/interface.py
- gffutils/iterators.py
- gffutils/parser.py
- gffutils/pybedtools_integration.py
- gffutils/scripts/gffutils-cli
- gffutils/test/conftest.py
- + gffutils/test/data/FBgn0031208.gtf.db
- − gffutils/test/data/a.py
- gffutils/test/feature_test.py
- gffutils/test/test_1.py
- gffutils/test/test_biopython_integration.py
- + gffutils/test/test_cli.py
- gffutils/test/test_issues.py
- gffutils/version.py
- requirements.txt
Changes:
=====================================
PKG-INFO
=====================================
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gffutils
-Version: 0.12
+Version: 0.13
Summary: Work with GFF and GTF files in a flexible database framework
Home-page: https://github.com/daler/gffutils
Author: Ryan Dale
@@ -19,6 +19,10 @@ Classifier: Programming Language :: Python :: 3.5
Classifier: Programming Language :: Python :: 3.6
Classifier: Topic :: Software Development :: Libraries :: Python Modules
License-File: LICENSE
+Requires-Dist: pyfaidx>=0.5.5.2
+Requires-Dist: argh>=0.26.2
+Requires-Dist: argcomplete>=1.9.4
+Requires-Dist: simplejson
gffutils
========
=====================================
gffutils.egg-info/PKG-INFO
=====================================
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gffutils
-Version: 0.12
+Version: 0.13
Summary: Work with GFF and GTF files in a flexible database framework
Home-page: https://github.com/daler/gffutils
Author: Ryan Dale
@@ -19,6 +19,10 @@ Classifier: Programming Language :: Python :: 3.5
Classifier: Programming Language :: Python :: 3.6
Classifier: Topic :: Software Development :: Libraries :: Python Modules
License-File: LICENSE
+Requires-Dist: pyfaidx>=0.5.5.2
+Requires-Dist: argh>=0.26.2
+Requires-Dist: argcomplete>=1.9.4
+Requires-Dist: simplejson
gffutils
========
=====================================
gffutils.egg-info/SOURCES.txt
=====================================
@@ -40,6 +40,7 @@ gffutils/test/performance_evaluation.py
gffutils/test/synth_test_base.py
gffutils/test/test_1.py
gffutils/test/test_biopython_integration.py
+gffutils/test/test_cli.py
gffutils/test/test_issues.py
gffutils/test/test_iterators.py
gffutils/test/test_merge.py
@@ -47,10 +48,10 @@ gffutils/test/test_merge_all.py
gffutils/test/data/F3-unique-3.v2.gff
gffutils/test/data/FBgn0031208.gff
gffutils/test/data/FBgn0031208.gtf
+gffutils/test/data/FBgn0031208.gtf.db
gffutils/test/data/Saccharomyces_cerevisiae.R64-1-1.83.5000_gene_ids.txt
gffutils/test/data/Saccharomyces_cerevisiae.R64-1-1.83.5000_transcript_ids.txt
gffutils/test/data/Saccharomyces_cerevisiae.R64-1-1.83.chromsizes.txt
-gffutils/test/data/a.py
gffutils/test/data/c_elegans_WS199_ann_gff.txt
gffutils/test/data/c_elegans_WS199_dna_shortened.fa
gffutils/test/data/c_elegans_WS199_shortened_gff.txt
=====================================
gffutils.egg-info/requires.txt
=====================================
@@ -1,5 +1,4 @@
pyfaidx>=0.5.5.2
-six>=1.12.0
argh>=0.26.2
argcomplete>=1.9.4
simplejson
=====================================
gffutils/attributes.py
=====================================
@@ -1,4 +1,3 @@
-import six
import collections
try:
@@ -95,7 +94,7 @@ class Attributes(collectionsAbc.MutableMapping):
return "\n".join(s)
def update(self, *args, **kwargs):
- for k, v in six.iteritems(dict(*args, **kwargs)):
+ for k, v in dict(*args, **kwargs).items():
self[k] = v
=====================================
gffutils/biopython_integration.py
=====================================
@@ -2,7 +2,6 @@
Module for integration with BioPython, specifically SeqRecords and SeqFeature
objects.
"""
-import six
try:
from Bio.SeqFeature import SeqFeature, FeatureLocation
@@ -15,7 +14,8 @@ from .feature import Feature, feature_from_line
_biopython_strand = {
"+": 1,
"-": -1,
- ".": 0,
+ ".": None,
+ "?": 0,
}
_feature_strand = dict((v, k) for k, v in _biopython_strand.items())
@@ -33,7 +33,7 @@ def to_seqfeature(feature):
If string, assume it is a GFF or GTF-format line; otherwise just use
the provided feature directly.
"""
- if isinstance(feature, six.string_types):
+ if isinstance(feature, str):
feature = feature_from_line(feature)
qualifiers = {
@@ -46,10 +46,11 @@ def to_seqfeature(feature):
return SeqFeature(
# Convert from GFF 1-based to standard Python 0-based indexing used by
# BioPython
- FeatureLocation(feature.start - 1, feature.stop),
+ FeatureLocation(
+ feature.start - 1, feature.stop, strand=_biopython_strand[feature.strand]
+ ),
id=feature.id,
type=feature.featuretype,
- strand=_biopython_strand[feature.strand],
qualifiers=qualifiers,
)
@@ -66,12 +67,12 @@ def from_seqfeature(s, **kwargs):
score = s.qualifiers.get("score", ".")[0]
seqid = s.qualifiers.get("seqid", ".")[0]
frame = s.qualifiers.get("frame", ".")[0]
- strand = _feature_strand[s.strand]
+ strand = _feature_strand[s.location.strand]
# BioPython parses 1-based GenBank positions into 0-based for use within
# Python. We need to convert back to 1-based GFF format here.
- start = s.location.start.position + 1
- stop = s.location.end.position
+ start = s.location.start + 1
+ stop = s.location.end
featuretype = s.type
id = s.id
attributes = dict(s.qualifiers)
=====================================
gffutils/convert.py
=====================================
@@ -2,8 +2,6 @@
Conversion functions that operate on :class:`FeatureDB` classes.
"""
-import six
-
def to_bed12(f, db, child_type="exon", name_field="ID"):
"""
@@ -22,7 +20,7 @@ def to_bed12(f, db, child_type="exon", name_field="ID"):
Attribute to be used in the "name" field of the BED12 entry. Usually
"ID" for GFF; "transcript_id" for GTF.
"""
- if isinstance(f, six.string_types):
+ if isinstance(f, str):
f = db[f]
children = list(db.children(f, featuretype=child_type, order_by="start"))
sizes = [len(i) for i in children]
=====================================
gffutils/create.py
=====================================
@@ -5,7 +5,6 @@ import tempfile
import sys
import os
import sqlite3
-import six
from textwrap import dedent
from gffutils import constants
from gffutils import version
@@ -119,7 +118,7 @@ class _DBCreator(object):
os.unlink(dbfn)
self.dbfn = dbfn
self.id_spec = id_spec
- if isinstance(dbfn, six.string_types):
+ if isinstance(dbfn, str):
conn = sqlite3.connect(dbfn)
else:
conn = dbfn
@@ -171,7 +170,7 @@ class _DBCreator(object):
"""
# If id_spec is a string or callable, convert to iterable for later
- if isinstance(self.id_spec, six.string_types):
+ if isinstance(self.id_spec, str):
id_key = [self.id_spec]
elif hasattr(self.id_spec, "__call__"):
id_key = [self.id_spec]
@@ -181,7 +180,7 @@ class _DBCreator(object):
elif isinstance(self.id_spec, dict):
try:
id_key = self.id_spec[f.featuretype]
- if isinstance(id_key, six.string_types):
+ if isinstance(id_key, str):
id_key = [id_key]
# Otherwise, use default auto-increment.
@@ -217,7 +216,8 @@ class _DBCreator(object):
"a single value is required for a primary key in the "
"database. Consider using a custom id_spec to "
"convert these multiple values into a single "
- "value".format(k))
+ "value".format(k)
+ )
except KeyError:
pass
try:
@@ -684,7 +684,7 @@ class _GFFDBCreator(_DBCreator):
# c.execute('CREATE INDEX childindex ON relations (child)')
# self.conn.commit()
- if isinstance(self._keep_tempfiles, six.string_types):
+ if isinstance(self._keep_tempfiles, str):
suffix = self._keep_tempfiles
else:
suffix = ".gffutils"
@@ -883,7 +883,7 @@ class _GTFDBCreator(_DBCreator):
msg = "transcript"
logger.info("Inferring %s extents " "and writing to tempfile" % msg)
- if isinstance(self._keep_tempfiles, six.string_types):
+ if isinstance(self._keep_tempfiles, str):
suffix = self._keep_tempfiles
else:
suffix = ".gffutils"
=====================================
gffutils/feature.py
=====================================
@@ -1,5 +1,4 @@
from pyfaidx import Fasta
-import six
import simplejson as json
from gffutils import constants
from gffutils import helpers
@@ -166,7 +165,7 @@ class Feature(object):
# for testing.
attributes = attributes or dict_class()
- if isinstance(attributes, six.string_types):
+ if isinstance(attributes, str):
try:
attributes = helpers._unjsonify(attributes, isattributes=True)
@@ -182,7 +181,7 @@ class Feature(object):
# If string, then try un-JSONifying it into a list; if that doesn't
# work then assume it's tab-delimited and convert to a list.
extra = extra or []
- if isinstance(extra, six.string_types):
+ if isinstance(extra, str):
try:
extra = helpers._unjsonify(extra)
except json.JSONDecodeError:
@@ -254,10 +253,7 @@ class Feature(object):
self.attributes[key] = value
def __str__(self):
- if six.PY3:
- return self.__unicode__()
- else:
- return unicode(self).encode("utf-8")
+ return self.__unicode__()
def __unicode__(self):
@@ -387,7 +383,7 @@ class Feature(object):
-------
string
"""
- if isinstance(fasta, six.string_types):
+ if isinstance(fasta, str):
fasta = Fasta(fasta, as_raw=False)
# recall GTF/GFF is 1-based closed; pyfaidx uses Python slice notation
=====================================
gffutils/gffwriter.py
=====================================
@@ -1,7 +1,6 @@
##
## GFF Writer (writer): serializing gffutils records as GFF text files.
##
-import six
import tempfile
import shutil
from time import strftime, localtime
@@ -41,7 +40,7 @@ class GFFWriter:
self.temp_file = None
# Output stream to write to
self.out_stream = None
- if isinstance(out, six.string_types):
+ if isinstance(out, str):
if self.in_place:
# Use temporary file
self.temp_file = tempfile.NamedTemporaryFile(delete=False)
=====================================
gffutils/helpers.py
=====================================
@@ -4,7 +4,6 @@ import os
import simplejson as json
import time
import tempfile
-import six
from gffutils import constants
from gffutils import bins
import gffutils
@@ -202,7 +201,7 @@ def make_query(
# e.g., "featuretype = 'exon'"
#
# or, "featuretype IN ('exon', 'CDS')"
- if isinstance(featuretype, six.string_types):
+ if isinstance(featuretype, str):
d["FEATURETYPE"] = "features.featuretype = ?"
args.append(featuretype)
else:
@@ -218,7 +217,7 @@ def make_query(
# `limit` is a string or a tuple of (chrom, start, stop)
#
# e.g., "seqid = 'chr2L' AND start > 1000 AND end < 5000"
- if isinstance(limit, six.string_types):
+ if isinstance(limit, str):
seqid, startstop = limit.split(":")
start, end = startstop.split("-")
else:
@@ -257,7 +256,7 @@ def make_query(
# Default is essentially random order.
#
# e.g. "ORDER BY seqid, start DESC"
- if isinstance(order_by, six.string_types):
+ if isinstance(order_by, str):
_order_by.append(order_by)
else:
@@ -387,7 +386,7 @@ def merge_attributes(attr1, attr2, numeric_sort=False):
if not isinstance(v, list):
new_d[k] = [v]
- for k, v in six.iteritems(attr1):
+ for k, v in attr1.items():
if k in attr2:
if not isinstance(v, list):
v = [v]
@@ -507,9 +506,9 @@ def is_gff_db(db_fname):
def to_unicode(obj, encoding="utf-8"):
- if isinstance(obj, six.string_types):
- if not isinstance(obj, six.text_type):
- obj = six.text_type(obj, encoding)
+ if isinstance(obj, str):
+ if not isinstance(obj, str):
+ obj = str(obj, encoding)
return obj
@@ -520,7 +519,6 @@ def canonical_transcripts(db, fasta_filename):
"""
import pyfaidx
-
fasta = pyfaidx.Fasta(fasta_filename, as_raw=False)
for gene in db.features_of_type("gene"):
@@ -536,7 +534,20 @@ def canonical_transcripts(db, fasta_filename):
cds_len += exon_length
total_len += exon_length
- exon_list.append((cds_len, total_len, transcript, exons if cds_len == 0 else [e for e in exons if e.featuretype in ['CDS', 'five_prime_UTR', 'three_prime_UTR']]))
+ exon_list.append(
+ (
+ cds_len,
+ total_len,
+ transcript,
+ exons
+ if cds_len == 0
+ else [
+ e
+ for e in exons
+ if e.featuretype in ["CDS", "five_prime_UTR", "three_prime_UTR"]
+ ],
+ )
+ )
# If we have CDS, then use the longest coding transcript
if max(i[0] for i in exon_list) > 0:
@@ -549,7 +560,12 @@ def canonical_transcripts(db, fasta_filename):
canonical_exons = best[-1]
transcript = best[-2]
- seqs = [i.sequence(fasta) for i in sorted(canonical_exons, key=lambda x: x.start, reverse=transcript.strand != '+')]
+ seqs = [
+ i.sequence(fasta)
+ for i in sorted(
+ canonical_exons, key=lambda x: x.start, reverse=transcript.strand != "+"
+ )
+ ]
yield transcript, "".join(seqs)
=====================================
gffutils/interface.py
=====================================
@@ -1,6 +1,5 @@
import collections
import os
-import six
import sqlite3
import shutil
import warnings
@@ -102,7 +101,7 @@ class FeatureDB(object):
keep_order=False,
pragmas=constants.default_pragmas,
sort_attribute_values=False,
- text_factory=str
+ text_factory=str,
):
"""
Connect to a database created by :func:`gffutils.create_db`.
@@ -694,7 +693,7 @@ class FeatureDB(object):
"If region is supplied, do not supply seqid, "
"start, or end as separate kwargs"
)
- if isinstance(region, six.string_types):
+ if isinstance(region, str):
toks = region.split(":")
if len(toks) == 1:
seqid = toks[0]
@@ -774,7 +773,7 @@ class FeatureDB(object):
# Add the featuretype clause
if featuretype is not None:
- if isinstance(featuretype, six.string_types):
+ if isinstance(featuretype, str):
featuretype = [featuretype]
feature_clause = " or ".join(["featuretype = ?" for _ in featuretype])
query += " AND (%s) " % feature_clause
@@ -872,10 +871,21 @@ class FeatureDB(object):
Used to initialize a new interfeature that is ready to be updated
in-place.
"""
- keys = ['id', 'seqid', 'source', 'featuretype', 'start', 'end',
- 'score', 'strand', 'frame', 'attributes', 'bin']
+ keys = [
+ "id",
+ "seqid",
+ "source",
+ "featuretype",
+ "start",
+ "end",
+ "score",
+ "strand",
+ "frame",
+ "attributes",
+ "bin",
+ ]
d = dict(zip(keys, f.astuple()))
- d['source'] = 'gffutils_derived'
+ d["source"] = "gffutils_derived"
return d
def _prep_for_yield(d):
@@ -886,12 +896,12 @@ class FeatureDB(object):
If start is greater than stop (which happens when trying to get
interfeatures for overlapping features), then return None.
"""
- d['start'] += 1
- d['end'] -= 1
- new_bin = bins.bins(d['start'], d['end'], one=True)
- d['bin'] = new_bin
+ d["start"] += 1
+ d["end"] -= 1
+ new_bin = bins.bins(d["start"], d["end"], one=True)
+ d["bin"] = new_bin
- if d['start'] > d['end']:
+ if d["start"] > d["end"]:
return None
new_feature = self._feature_returner(**d)
@@ -899,12 +909,13 @@ class FeatureDB(object):
# concat list of ID to create uniq IDs because feature with
# multiple values for their ID are no longer permitted since v0.11
if "ID" in new_feature.attributes and len(new_feature.attributes["ID"]) > 1:
- new_id = '-'.join(new_feature.attributes["ID"])
+ new_id = "-".join(new_feature.attributes["ID"])
new_feature.attributes["ID"] = [new_id]
return new_feature
# If not provided, use a no-op function instead.
if not attribute_func:
+
def attribute_func(a):
return a
@@ -933,23 +944,23 @@ class FeatureDB(object):
nfeatures += 1
# Adjust the interfeature dict in-place with coords...
- interfeature['start'] = last_feature.stop
- interfeature['end'] = f.start
+ interfeature["start"] = last_feature.stop
+ interfeature["end"] = f.start
# ...featuretype
if new_featuretype is None:
- interfeature['featuretype'] = "inter_%s_%s" % (
+ interfeature["featuretype"] = "inter_%s_%s" % (
last_feature.featuretype,
f.featuretype,
)
else:
- interfeature['featuretype'] = new_featuretype
+ interfeature["featuretype"] = new_featuretype
# ...strand
if last_feature.strand != f.strand:
- interfeature['strand'] = '.'
+ interfeature["strand"] = "."
else:
- interfeature['strand'] = f.strand
+ interfeature["strand"] = f.strand
# and attributes
if merge_attributes:
@@ -964,7 +975,7 @@ class FeatureDB(object):
if update_attributes:
new_attributes.update(update_attributes)
- interfeature['attributes'] = new_attributes
+ interfeature["attributes"] = new_attributes
# Ready to yield
new_feature = _prep_for_yield(interfeature)
@@ -994,7 +1005,7 @@ class FeatureDB(object):
FeatureDB object, with features deleted.
"""
if make_backup:
- if isinstance(self.dbfn, six.string_types):
+ if isinstance(self.dbfn, str):
shutil.copy2(self.dbfn, self.dbfn + ".bak")
c = self.conn.cursor()
@@ -1006,12 +1017,12 @@ class FeatureDB(object):
"""
if isinstance(features, FeatureDB):
features = features.all_features()
- if isinstance(features, six.string_types):
+ if isinstance(features, str):
features = [features]
if isinstance(features, Feature):
features = [features]
for feature in features:
- if isinstance(feature, six.string_types):
+ if isinstance(feature, str):
_id = feature
else:
_id = feature.id
@@ -1027,7 +1038,13 @@ class FeatureDB(object):
WARNING: If you used any non-default kwargs for gffutils.create_db when
creating the database in the first place (especially
`disable_infer_transcripts` or `disable_infer_genes`) then you should
- use those same arguments here.
+ use those same arguments here. The exception is the `force` argument
+ though -- in some cases including that can truncate the database.
+
+ WARNING: If you are creating features from the database and writing
+ immediately back to the database, you could experience deadlocks. See
+ the help for `create_introns` for some different options for avoiding
+ this.
The returned object is the same FeatureDB, but since it is pointing to
the same database and that has been just updated, the new features can
@@ -1060,7 +1077,7 @@ class FeatureDB(object):
from gffutils import iterators
if make_backup:
- if isinstance(self.dbfn, six.string_types):
+ if isinstance(self.dbfn, str):
shutil.copy2(self.dbfn, self.dbfn + ".bak")
# get iterator-specific kwargs
@@ -1139,9 +1156,9 @@ class FeatureDB(object):
-------
FeatureDB object with new relations added.
"""
- if isinstance(parent, six.string_types):
+ if isinstance(parent, str):
parent = self[parent]
- if isinstance(child, six.string_types):
+ if isinstance(child, str):
child = self[child]
c = self.conn.cursor()
@@ -1234,9 +1251,42 @@ class FeatureDB(object):
-----
The returned generator can be passed directly to the
:meth:`FeatureDB.update` method to permanently add them to the
- database, e.g., ::
+ database. However, this needs to be done carefully to avoid deadlocks
+ from simultaneous reading/writing.
+
+ When using `update()` you should also use the same keyword arguments
+ used to create the db in the first place (with the exception of `force`).
+
+ Here are three options for getting the introns back into the database,
+ depending on the circumstances.
+
+ **OPTION 1: Create list of introns.**
+
+ Consume the `create_introns()` generator completely before writing to
+ the database. If you have sufficient memory, this is the easiest
+ option::
+
+ db.update(list(db.create_introns(**intron_kwargs)), **create_kwargs)
+
+ **OPTION 2: Use `WAL <https://sqlite.org/wal.html>`__**
+
+ The WAL pragma enables simultaneous read/write. WARNING: this does not
+ work if the database is on a networked filesystem, like those used on
+ many HPC clusters.
+
+ ::
- db.update(db.create_introns())
+ db.set_pragmas({"journal_mode": "WAL"})
+ db.update(db.create_introns(**intron_kwargs), **create_kwargs)
+
+ **OPTION 3: Write to intermediate file.**
+
+ Use this if you are memory limited and using a networked filesystem::
+
+ with open('tmp.gtf', 'w') as fout:
+ for intron in db.create_introns(**intron_kwargs):
+ fout.write(str(intron) + "\n")
+ db.update(gffutils.DataIterator('tmp.gtf'), **create_kwargs)
"""
if (grandparent_featuretype and parent_featuretype) or (
@@ -1390,11 +1440,12 @@ class FeatureDB(object):
splice_site.start = splice_site.end - 1
# make ID uniq by adding suffix
- splice_site.attributes["ID"] = [new_featuretype + "_" + splice_site.attributes["ID"][0]]
+ splice_site.attributes["ID"] = [
+ new_featuretype + "_" + splice_site.attributes["ID"][0]
+ ]
yield splice_site
-
def _old_merge(self, features, ignore_strand=False):
"""
DEPRECATED, only retained here for backwards compatibility. Please use
@@ -1710,10 +1761,12 @@ class FeatureDB(object):
return result_features
def children_bp(
- self, feature, child_featuretype="exon", merge=False,
- merge_criteria=(mc.seqid, mc.overlap_end_inclusive, mc.strand,
- mc.feature_type),
- **kwargs
+ self,
+ feature,
+ child_featuretype="exon",
+ merge=False,
+ merge_criteria=(mc.seqid, mc.overlap_end_inclusive, mc.strand, mc.feature_type),
+ **kwargs
):
"""
Total bp of all children of a featuretype.
@@ -1752,9 +1805,14 @@ class FeatureDB(object):
raise ValueError(
"'ignore_strand' has been deprecated; please use "
"merge_criteria to control how features should be merged. "
- "E.g., leave out the 'mc.strand' criteria to ignore strand.")
+ "E.g., leave out the 'mc.strand' criteria to ignore strand."
+ )
else:
- raise TypeError("merge() got unexpected keyword arguments '{}'".format(kwargs.keys()))
+ raise TypeError(
+ "merge() got unexpected keyword arguments '{}'".format(
+ kwargs.keys()
+ )
+ )
children = self.children(
feature, featuretype=child_featuretype, order_by="start"
@@ -1941,7 +1999,6 @@ class FeatureDB(object):
for (i,) in c:
yield i
-
# Recycle the docs for _relation so they stay consistent between parents()
# and children()
children.__doc__ = children.__doc__.format(_relation_docstring=_relation.__doc__)
=====================================
gffutils/iterators.py
=====================================
@@ -16,13 +16,8 @@ from gffutils.feature import feature_from_line
from gffutils.interface import FeatureDB
from gffutils import helpers
from textwrap import dedent
-import six
-from six.moves.urllib.request import urlopen
-
-if six.PY3:
- from urllib import parse as urlparse
-else:
- import urlparse
+from urllib.request import urlopen
+from urllib import parse as urlparse
class Directive(object):
@@ -133,7 +128,7 @@ class _FileIterator(_BaseIterator):
valid_lines = 0
with self.open_function(self.data) as fh:
for i, line in enumerate(fh):
- if isinstance(line, six.binary_type):
+ if isinstance(line, bytes):
line = line.decode("utf-8")
line = line.rstrip("\n\r")
self.current_item = line
@@ -295,11 +290,11 @@ def DataIterator(
force_dialect_check=force_dialect_check,
**kwargs,
)
- if isinstance(data, six.string_types):
+ if isinstance(data, str):
if from_string:
tmp = tempfile.NamedTemporaryFile(delete=False)
data = dedent(data)
- if isinstance(data, six.text_type):
+ if isinstance(data, str):
data = data.encode("utf-8")
tmp.write(data)
tmp.close()
=====================================
gffutils/parser.py
=====================================
@@ -3,7 +3,7 @@
import re
import copy
import collections
-from six.moves import urllib
+import urllib
from gffutils import constants
from gffutils.exceptions import AttributeStringError
=====================================
gffutils/pybedtools_integration.py
=====================================
@@ -6,7 +6,6 @@ import os
import pybedtools
from pybedtools import featurefuncs
from gffutils import helpers
-import six
def to_bedtool(iterator):
@@ -210,7 +209,7 @@ def tsses(
if merge_overlapping or as_bed6:
- if isinstance(attrs, six.string_types):
+ if isinstance(attrs, str):
attrs = [attrs]
def to_bed(f):
=====================================
gffutils/scripts/gffutils-cli
=====================================
@@ -1,7 +1,5 @@
#!/usr/bin/python
-from __future__ import print_function
-
"""
Command line interface for gffutils.
=====================================
gffutils/test/conftest.py
=====================================
@@ -1 +1 @@
-collect_ignore=["data"]
+collect_ignore = ["data"]
=====================================
gffutils/test/data/FBgn0031208.gtf.db
=====================================
Binary files /dev/null and b/gffutils/test/data/FBgn0031208.gtf.db differ
=====================================
gffutils/test/data/a.py deleted
=====================================
@@ -1,22 +0,0 @@
-import gffutils
-
-db = gffutils.create_db('issue_197.gff', ':memory:', merge_strategy='error')
-genes = list(db.features_of_type('gene'))
-
-genes = list(db.merge(genes))
-
-igss = list( db.interfeatures(genes,new_featuretype='intergenic_space') )
-
-def transform(f):
- f['ID'] = [ '-'.join(f.attributes['ID']) ]
- return f
-
-print('------')
-for i in igss:
- print(transform(i))
-print('------')
-
-db = db.update(igss, transform=transform, merge_strategy='error')
-
-for i in db.all_features(order_by=('seqid', 'start')):
- print(i)
=====================================
gffutils/test/feature_test.py
=====================================
@@ -152,10 +152,10 @@ class IsolatedTestCase(object):
namespace or something? Anyway, these setup/teardowns do the trick.
"""
- def setup(self):
+ def setup_method(self):
constants.always_return_list = False
- def teardown(self):
+ def teardown_method(self):
constants.always_return_list = True
def test_feature_single_item(self):
=====================================
gffutils/test/test_1.py
=====================================
@@ -9,11 +9,10 @@ import gffutils.inspect as inspect
import gffutils.iterators as iterators
import sys
import os
-import six
import shutil
import threading
import tempfile
-from six.moves import SimpleHTTPServer
+import http.server as SimpleHTTPServer
if sys.version_info.major == 3:
import socketserver as SocketServer
@@ -193,7 +192,7 @@ class BaseDB(object):
orig_fn = None
- def setup(self):
+ def setup_method(self):
def gff_id_func(f):
if "ID" in f.attributes:
return f.attributes["ID"][0]
@@ -395,7 +394,7 @@ def test_gffwriter():
), "unsanitized.gff should not have a gffutils-style header."
db_in = gffutils.create_db(fn, ":memory:", keep_order=True)
# Fetch first record
- rec = six.next(db_in.all_features())
+ rec = next(db_in.all_features())
##
## Write GFF file in-place test
##
@@ -551,7 +550,7 @@ def test_nonascii():
# ...but fails using plain nosetests or when using regular Python
# interpreter
except UnicodeEncodeError:
- print(six.text_type(i))
+ print(str(i))
def test_feature_merge():
@@ -637,7 +636,7 @@ def test_feature_merge():
id_spec="gene_id",
force_merge_fields=["start"],
keep_order=True,
- )
+ )
# test that warnings are raised because of strand and frame
with warnings.catch_warnings(record=True) as w:
@@ -903,7 +902,7 @@ def test_iterator_update():
db.update(gen(), merge_strategy="replace")
assert len(list(db.all_features())) == 12
assert len(list(db.features_of_type("gene"))) == 1
- g = six.next(db.features_of_type("gene"))
+ g = next(db.features_of_type("gene"))
assert g.start == 1, g.start
assert g.stop == 100, g.stop
@@ -924,7 +923,7 @@ def test_iterator_update():
)
assert len(list(db.all_features())) == 12
assert len(list(db.features_of_type("gene"))) == 1
- g = six.next(db.features_of_type("gene"))
+ g = next(db.features_of_type("gene"))
print(g)
assert g.start == 1, g.start
assert g.stop == 100, g.stop
@@ -934,17 +933,19 @@ def test_iterator_update():
[(i.start, i.stop) for i in db.features_of_type("exon")]
)
+
def clean_tempdir():
tempfile.tempdir = tempdir
if os.path.exists(tempdir):
shutil.rmtree(tempdir)
os.makedirs(tempdir)
+
# specify a writeable temp dir for testing
tempdir = "/tmp/gffutils-test"
-def test_tempfiles():
+def test_tempfiles():
clean_tempdir()
@@ -992,6 +993,7 @@ def test_tempfiles():
assert len(filelist) == 1, filelist
assert filelist[0].endswith(".GFFtmp")
+
@pytest.mark.skip(reason="Unclear if still needed; currently failing")
def test_parallel_db():
# DISABLING in v0.12
@@ -1114,7 +1116,7 @@ def test_deprecation_handler():
gffutils.example_filename("FBgn0031208.gtf"),
":memory:",
infer_gene_extent=False,
- )
+ )
def test_nonsense_kwarg():
@@ -1123,7 +1125,7 @@ def test_nonsense_kwarg():
gffutils.example_filename("FBgn0031208.gtf"),
":memory:",
asdf=True,
- )
+ )
def test_infer_gene_extent():
@@ -1242,7 +1244,8 @@ def test_create_splice_sites():
db = gffutils.create_db(fn, ":memory:")
db = db.update(db.create_splice_sites())
observed = "\n".join(str(feature) for feature in db.all_features())
- expected = dedent("""\
+ expected = dedent(
+ """\
chr1 ensGene gene 4763287 4775820 . - . Name=ENSMUSG00000033845;ID=ENSMUSG00000033845;Alias=ENSMUSG00000033845;gid=ENSMUSG00000033845
chr1 ensGene mRNA 4764517 4775779 . - . Name=ENSMUST00000045689;Parent=ENSMUSG00000033845;ID=ENSMUST00000045689;Alias=ENSMUSG00000033845;gid=ENSMUSG00000033845
chr1 ensGene CDS 4775654 4775758 . - 0 Name=ENSMUST00000045689.cds0;Parent=ENSMUST00000045689;ID=ENSMUST00000045689.cds0;gid=ENSMUSG00000033845
@@ -1260,13 +1263,12 @@ def test_create_splice_sites():
chr1 gffutils_derived three_prime_cis_splice_site 4772815 4772816 . - . Name=ENSMUST00000045689.exon0,ENSMUST00000045689.exon1;Parent=ENSMUST00000045689;ID=three_prime_cis_splice_site_ENSMUST00000045689.exon0-ENSMUST00000045689.exon1;gid=ENSMUSG00000033845
chr1 gffutils_derived five_prime_cis_splice_site 4767604 4767605 . - . Name=ENSMUST00000045689.exon2,ENSMUST00000045689.exon3;Parent=ENSMUST00000045689;ID=five_prime_cis_splice_site_ENSMUST00000045689.exon2-ENSMUST00000045689.exon3;gid=ENSMUSG00000033845
chr1 gffutils_derived five_prime_cis_splice_site 4772647 4772648 . - . Name=ENSMUST00000045689.exon1,ENSMUST00000045689.exon2;Parent=ENSMUST00000045689;ID=five_prime_cis_splice_site_ENSMUST00000045689.exon1-ENSMUST00000045689.exon2;gid=ENSMUSG00000033845
- chr1 gffutils_derived five_prime_cis_splice_site 4775652 4775653 . - . Name=ENSMUST00000045689.exon0,ENSMUST00000045689.exon1;Parent=ENSMUST00000045689;ID=five_prime_cis_splice_site_ENSMUST00000045689.exon0-ENSMUST00000045689.exon1;gid=ENSMUSG00000033845""")
+ chr1 gffutils_derived five_prime_cis_splice_site 4775652 4775653 . - . Name=ENSMUST00000045689.exon0,ENSMUST00000045689.exon1;Parent=ENSMUST00000045689;ID=five_prime_cis_splice_site_ENSMUST00000045689.exon0-ENSMUST00000045689.exon1;gid=ENSMUSG00000033845"""
+ )
assert observed == expected
-
-
if __name__ == "__main__":
# this test case fails
# test_attributes_modify()
=====================================
gffutils/test/test_biopython_integration.py
=====================================
@@ -13,8 +13,8 @@ def test_roundtrip():
feature.keep_order = True
dialect = feature.dialect
s = bp.to_seqfeature(feature)
- assert s.location.start.position == feature.start - 1
- assert s.location.end.position == feature.stop
+ assert s.location.start == feature.start - 1
+ assert s.location.end == feature.stop
assert s.id == feature.id
f = bp.from_seqfeature(s, dialect=dialect, keep_order=True)
assert feature == f
=====================================
gffutils/test/test_cli.py
=====================================
@@ -0,0 +1,18 @@
+import subprocess as sp
+import gffutils
+from gffutils import example_filename, create, feature
+
+
+def test_issue_224():
+ fn = gffutils.example_filename("FBgn0031208.gtf")
+ sp.run(["gffutils-cli", "create", "--force", fn])
+ p = sp.run(
+ ["gffutils-cli", "children", fn + ".db", "FBgn0031208"],
+ check=True,
+ capture_output=True,
+ universal_newlines=True,
+ )
+ assert (
+ p.stdout.splitlines()[0]
+ == 'chr2L\tgffutils_derived\tgene\t7529\t9484\t.\t+\t.\tgene_id "FBgn0031208";'
+ )
=====================================
gffutils/test/test_issues.py
=====================================
@@ -13,6 +13,7 @@ from gffutils import merge_criteria as mc
import pytest
+
def test_issue_79():
gtf = gffutils.example_filename("keep-order-test.gtf")
db = gffutils.create_db(
@@ -291,41 +292,41 @@ def test_issue_128():
# In #128, some lines had separators of "; " and some with ";". The first
# one in the file would win. Now the detection pays more attention to lines
# with more attributes to make it work properly
- gff = gffutils.example_filename('gms2_example.gff3')
+ gff = gffutils.example_filename("gms2_example.gff3")
db = gffutils.create_db(gff, ":memory:", force=True)
expected = {
- 'ID': ['1'],
- 'Parent': ['gene_1'],
- 'gene_type': ['native'],
- 'partial': ['11'],
- 'gc': ['33'],
- 'length': ['363'],
+ "ID": ["1"],
+ "Parent": ["gene_1"],
+ "gene_type": ["native"],
+ "partial": ["11"],
+ "gc": ["33"],
+ "length": ["363"],
}
- assert dict(db['1'].attributes) == expected
+ assert dict(db["1"].attributes) == expected
def test_issue_157():
# With the merge overhaul, children_bp incorrectly still used ignore_strand.
- db = gffutils.create_db(gffutils.example_filename('FBgn0031208.gff'), ":memory:")
- gene = next(db.features_of_type('gene'))
- children = list(db.children(gene, featuretype='exon'))
+ db = gffutils.create_db(gffutils.example_filename("FBgn0031208.gff"), ":memory:")
+ gene = next(db.features_of_type("gene"))
+ children = list(db.children(gene, featuretype="exon"))
# Modify the last one to have a different strand so we can test the
# ignore_strand argument.
- children[-1].strand = '-'
- db.update(children[-1:], merge_strategy='replace')
+ children[-1].strand = "-"
+ db.update(children[-1:], merge_strategy="replace")
# and, since updating has been problematic in the past, double-check again
# that the strand is changed in the db.
- assert list(db.children(gene, featuretype='exon'))[-1].strand == '-'
- cbp1 = db.children_bp(gene, child_featuretype='exon')
+ assert list(db.children(gene, featuretype="exon"))[-1].strand == "-"
+ cbp1 = db.children_bp(gene, child_featuretype="exon")
# Previously this would give:
# TypeError: merge() got an unexpected keyword argument 'ignore_strand'
#
- # Now changing to ValueError and suggesting a fix.
+ # Now changing to ValueError and suggesting a fix.
with pytest.raises(ValueError):
- db.children_bp(gene, child_featuretype='exon', merge=True, ignore_strand=True)
+ db.children_bp(gene, child_featuretype="exon", merge=True, ignore_strand=True)
with pytest.raises(ValueError):
db.children_bp(gene, ignore_strand=True, nonexistent=True)
with pytest.raises(TypeError):
@@ -333,12 +334,17 @@ def test_issue_157():
# The way to do it now is the following (we can omit the mc.feature_type
# since we're preselecting for exons anyway):
- db.children_bp(gene, child_featuretype='exon', merge=True, merge_criteria=(mc.overlap_end_inclusive))
+ db.children_bp(
+ gene,
+ child_featuretype="exon",
+ merge=True,
+ merge_criteria=(mc.overlap_end_inclusive),
+ )
def test_issue_159():
- db = gffutils.create_db(gffutils.example_filename('FBgn0031208.gff'), ":memory:")
- fasta = gffutils.example_filename('dm6-chr2L.fa')
+ db = gffutils.create_db(gffutils.example_filename("FBgn0031208.gff"), ":memory:")
+ fasta = gffutils.example_filename("dm6-chr2L.fa")
for transcript, seq in gffutils.helpers.canonical_transcripts(db, fasta):
pass
@@ -347,56 +353,59 @@ def test_issue_164():
# Something strange with the original annotation, but seems fine at least
# after pasting in the offending genes from the GitHub comments.
db = gffutils.create_db(
- gffutils.example_filename('sharr.gtf'),
- ':memory:',
+ gffutils.example_filename("sharr.gtf"),
+ ":memory:",
disable_infer_transcripts=True,
disable_infer_genes=True,
- id_spec={'gene': 'gene_id', 'transcript': 'transcript_id'},
- merge_strategy='create_unique',
- keep_order=True)
+ id_spec={"gene": "gene_id", "transcript": "transcript_id"},
+ merge_strategy="create_unique",
+ keep_order=True,
+ )
def test_issue_166():
# Added the new FeatureDB.seqids() method.
- db = gffutils.create_db(gffutils.example_filename('nonascii'), ':memory:')
+ db = gffutils.create_db(gffutils.example_filename("nonascii"), ":memory:")
seqs = list(db.seqids())
- assert seqs == ['2L', '2R', '3L', '3R', 'X'], seqs
+ assert seqs == ["2L", "2R", "3L", "3R", "X"], seqs
def test_issue_167():
# Previously was causing sqlite3.InterfaceError
- db = gffutils.create_db(gffutils.example_filename('issue167.gff'), ':memory:')
+ db = gffutils.create_db(gffutils.example_filename("issue167.gff"), ":memory:")
def test_issue_174():
db = gffutils.create_db(
- gffutils.example_filename('issue174.gtf'),
- ':memory:',
- merge_strategy='warning',
+ gffutils.example_filename("issue174.gtf"),
+ ":memory:",
+ merge_strategy="warning",
)
introns = [f for f in db.create_introns()]
- observed = [i.attributes['exon_number'] for i in introns]
- assert observed[7] == ['8', '9']
- assert observed[8] == ['10', '9']
- assert observed[9] == ['10', '11']
+ observed = [i.attributes["exon_number"] for i in introns]
+ assert observed[7] == ["8", "9"]
+ assert observed[8] == ["10", "9"]
+ assert observed[9] == ["10", "11"]
# Now do the same thing, but with the new numeric_sort arg
introns = [f for f in db.create_introns(numeric_sort=True)]
- observed = [i.attributes['exon_number'] for i in introns]
- assert observed[7] == ['8', '9']
+ observed = [i.attributes["exon_number"] for i in introns]
+ assert observed[7] == ["8", "9"]
# This should be fixed:
- assert observed[8] == ['9', '10']
- assert observed[9] == ['10', '11']
+ assert observed[8] == ["9", "10"]
+ assert observed[9] == ["10", "11"]
+
def test_issue_197():
# Previously this would fail with ValueError due to using the stop position
# of the last item on the previous chrom as the start position.
- db = gffutils.create_db(gffutils.example_filename('issue_197.gff'), ':memory:', merge_strategy='error')
- genes = list(db.features_of_type('gene'))
- igss = list( db.interfeatures(genes,new_featuretype='intergenic_space') )
-
+ db = gffutils.create_db(
+ gffutils.example_filename("issue_197.gff"), ":memory:", merge_strategy="error"
+ )
+ genes = list(db.features_of_type("gene"))
+ igss = list(db.interfeatures(genes, new_featuretype="intergenic_space"))
# Prior to PR #219, multiple IDs could be created by interfeatures, which
# in turn was patched here by providing the transform to db.update. With
@@ -405,23 +414,24 @@ def test_issue_197():
#
# However, keeping the test as-is to ensure backward-compatibility.
def transform(f):
- f['ID'] = [ '-'.join(f.attributes['ID']) ]
+ f["ID"] = ["-".join(f.attributes["ID"])]
return f
- db = db.update(igss, transform=transform, merge_strategy='error')
+ db = db.update(igss, transform=transform, merge_strategy="error")
- obs = list(db.features_of_type('intergenic_space'))
+ obs = list(db.features_of_type("intergenic_space"))
for i in obs:
print(i)
assert [str(i) for i in obs] == [
- 'tig00000492\tgffutils_derived\tintergenic_space\t47236\t47350\t.\t-\t.\tID=ctg492.gene0001-ctg492.gene0002;Name=gene0001,gene0002',
- 'tig00000492\tgffutils_derived\tintergenic_space\t48257\t49999\t.\t-\t.\tID=ctg492.gene0002-gene0;Name=gene0002',
- 'tig00000492\tgffutils_derived\tintergenic_space\t50050\t50054\t.\t-\t.\tID=gene3-gene4',
- 'tig00000492\tgffutils_derived\tintergenic_space\t50071\t50071\t.\t-\t.\tID=gene4-gene5',
- 'tig00000492\tgffutils_derived\tintergenic_space\t50076\t50089\t.\t-\t.\tID=gene5-gene6',
+ "tig00000492\tgffutils_derived\tintergenic_space\t47236\t47350\t.\t-\t.\tID=ctg492.gene0001-ctg492.gene0002;Name=gene0001,gene0002",
+ "tig00000492\tgffutils_derived\tintergenic_space\t48257\t49999\t.\t-\t.\tID=ctg492.gene0002-gene0;Name=gene0002",
+ "tig00000492\tgffutils_derived\tintergenic_space\t50050\t50054\t.\t-\t.\tID=gene3-gene4",
+ "tig00000492\tgffutils_derived\tintergenic_space\t50071\t50071\t.\t-\t.\tID=gene4-gene5",
+ "tig00000492\tgffutils_derived\tintergenic_space\t50076\t50089\t.\t-\t.\tID=gene5-gene6",
]
+
def test_issue_198():
line = 'NC_000001.11 BestRefSeq gene 14362 29370 . - . gene_id "WASH7P"; transcript_id ""; db_xref "GeneID:653635"; db_xref "HGNC:HGNC:38034"; description "WASP family homolog 7, pseudogene"; gbkey "Gene"; gene "WASH7P"; gene_biotype "transcribed_pseudogene"; gene_synonym "FAM39F"; gene_synonym "WASH5P"; pseudo "true";'
@@ -440,7 +450,7 @@ def test_issue_198():
# of repeated keys always wins.
f = feature.feature_from_line(line)
- assert f.attributes['description'] == ['WASP family homolog 7, pseudogene']
+ assert f.attributes["description"] == ["WASP family homolog 7, pseudogene"]
# If we remove one of the db_xref keys, then the parser sees the comma and
# figures it's a multivalue key.
@@ -449,59 +459,62 @@ def test_issue_198():
# Previous result, note leading space --------------------------->| |
# assert f.attributes['description'] == ['WASP family homolog 7', ' pseudogene']
- assert f.attributes['description'] == ['WASP family homolog 7, pseudogene']
+ assert f.attributes["description"] == ["WASP family homolog 7, pseudogene"]
# But removing that space before "pseudogene" means it's interpreted as
# a multivalue attribute
line = 'NC_000001.11 BestRefSeq gene 14362 29370 . - . gene_id "WASH7P"; transcript_id ""; db_xref "GeneID:653635"; description "WASP family homolog 7,pseudogene"; gbkey "Gene"; gene "WASH7P"; gene_biotype "transcribed_pseudogene"; gene_synonym "FAM39F"; gene_synonym "WASH5P"; pseudo "true";'
f = feature.feature_from_line(line)
- assert f.attributes['description'] == ['WASP family homolog 7', 'pseudogene']
+ assert f.attributes["description"] == ["WASP family homolog 7", "pseudogene"]
# Confirm behavior of corner cases like a trailing comma
line = "chr17 RefSeq CDS 6806527 6806553 . + 0 Name=CDS:NC_000083.5:LOC100040603;Parent=XM_001475631.1,"
f = feature.feature_from_line(line)
- assert f.attributes['Parent'] == ['XM_001475631.1', '']
+ assert f.attributes["Parent"] == ["XM_001475631.1", ""]
def test_issue_207():
-
def _check(txt, expected_keys, dialect_trailing_semicolon):
- db = gffutils.create_db(txt.replace(' ', '\t'), ':memory:', from_string=True)
+ db = gffutils.create_db(txt.replace(" ", "\t"), ":memory:", from_string=True)
assert [list(f.attributes.keys()) for f in db.all_features()] == expected_keys
- assert db.dialect['trailing semicolon'] == dialect_trailing_semicolon
+ assert db.dialect["trailing semicolon"] == dialect_trailing_semicolon
# All lines have trailing semicolon
_check(
- txt=dedent("""\
+ txt=dedent(
+ """\
chr1 AUGUSTUS gene 68330 73621 1 - . ID=g1903;
chr1 AUGUSTUS mRNA 68330 73621 1 - . ID=g1903.t1;Parent=g1903;
chr1 Pfam protein_match 73372 73618 1 - . ID=g1903.t1.d1;Parent=g1903.t1;
chr1 Pfam protein_hmm_match 73372 73618 1 - . ID=g1903.t1.d1.1;Parent=g1903.t1.d1;
- """),
- expected_keys = [
- ['ID'],
- ['ID', 'Parent'],
- ['ID', 'Parent'],
- ['ID', 'Parent'],
+ """
+ ),
+ expected_keys=[
+ ["ID"],
+ ["ID", "Parent"],
+ ["ID", "Parent"],
+ ["ID", "Parent"],
],
- dialect_trailing_semicolon=True
+ dialect_trailing_semicolon=True,
)
# First two lines have trailing semicolon. However, the heuristics of
# dialect selection, which favor attributes with more values (assuming more
# information), decides that this file does NOT have trailing semicolons.
_check(
- txt=dedent("""\
+ txt=dedent(
+ """\
chr1 AUGUSTUS gene 68330 73621 1 - . ID=g1903;
chr1 AUGUSTUS mRNA 68330 73621 1 - . ID=g1903.t1;Parent=g1903;
chr1 Pfam protein_match 73372 73618 1 - . ID=g1903.t1.d1;Parent=g1903.t1
chr1 Pfam protein_hmm_match 73372 73618 1 - . ID=g1903.t1.d1.1;Parent=g1903.t1.d1
- """),
- expected_keys = [
- ['ID', ''],
- ['ID', 'Parent', ''],
- ['ID', 'Parent'],
- ['ID', 'Parent'],
+ """
+ ),
+ expected_keys=[
+ ["ID", ""],
+ ["ID", "Parent", ""],
+ ["ID", "Parent"],
+ ["ID", "Parent"],
],
dialect_trailing_semicolon=False,
)
@@ -514,18 +527,15 @@ def test_issue_207():
# as above to give higher weight, and to break the tie between with and
# without trailing semicolon, falls back to first dialect observed.
_check(
- txt=dedent("""\
+ txt=dedent(
+ """\
chr1 AUGUSTUS gene 68330 73621 1 - . ID=g1903;
chr1 AUGUSTUS mRNA 68330 73621 1 - . ID=g1903.t1;
chr1 Pfam protein_match 73372 73618 1 - . ID=g1903.t1.d1
chr1 Pfam protein_hmm_match 73372 73618 1 - . ID=g1903.t1.d1.1
- """),
- expected_keys=[
- ['ID'],
- ['ID'],
- ['ID'],
- ['ID']
- ],
+ """
+ ),
+ expected_keys=[["ID"], ["ID"], ["ID"], ["ID"]],
dialect_trailing_semicolon=True,
)
@@ -533,39 +543,37 @@ def test_issue_207():
# semicolon by giving one more line as evidence. Only difference is from
# above is the last line.
_check(
- txt=dedent("""\
+ txt=dedent(
+ """\
chr1 AUGUSTUS gene 68330 73621 1 - . ID=g1903;
chr1 AUGUSTUS mRNA 68330 73621 1 - . ID=g1903.t1;
chr1 Pfam protein_match 73372 73618 1 - . ID=g1903.t1.d1
chr1 Pfam protein_hmm_match 73372 73618 1 - . ID=g1903.t1.d1.1
chr1 Pfam protein_hmm_match 73372 73618 1 - . ID=g1904.t1.d1.1
- """),
+ """
+ ),
expected_keys=[
- ['ID', ''],
- ['ID', ''],
- ['ID'],
- ['ID'],
- ['ID'],
+ ["ID", ""],
+ ["ID", ""],
+ ["ID"],
+ ["ID"],
+ ["ID"],
],
dialect_trailing_semicolon=False,
)
-
# Again seems inconsistent at first, but heuristics break ties by
# preferring first dialect, which here is no trailing semicolon.
_check(
- txt=dedent("""\
+ txt=dedent(
+ """\
chr1 AUGUSTUS gene 68330 73621 1 - . ID=g1903
chr1 AUGUSTUS mRNA 68330 73621 1 - . ID=g1903.t1
chr1 Pfam protein_match 73372 73618 1 - . ID=g1903.t1.d1;
chr1 Pfam protein_hmm_match 73372 73618 1 - . ID=g1903.t1.d1.1;
- """),
- expected_keys=[
- ['ID'],
- ['ID'],
- ['ID', ''],
- ['ID', '']
- ],
+ """
+ ),
+ expected_keys=[["ID"], ["ID"], ["ID", ""], ["ID", ""]],
dialect_trailing_semicolon=False,
)
@@ -587,7 +595,6 @@ def test_issue_213():
it = gffutils.iterators.DataIterator(data, from_string=True)
assert it.directives == ["gff-version 3"]
-
# Ensure they're parsed into the db from a string
db = gffutils.create_db(data, dbfn=":memory:", from_string=True, verbose=False)
assert db.directives == ["gff-version 3"], db.directives
@@ -602,6 +609,6 @@ def test_issue_213():
# Ensure they're parsed into the db from a file, and going to a file (to
# exactly replicate example in #213)
- db = gffutils.create_db(tmp, dbfn='issue_213.db', force=True)
+ db = gffutils.create_db(tmp, dbfn="issue_213.db", force=True)
assert db.directives == ["gff-version 3"], db.directives
assert len(db.directives) == 1
=====================================
gffutils/version.py
=====================================
@@ -1 +1 @@
-version = "0.12"
+version = "0.13"
=====================================
requirements.txt
=====================================
@@ -1,5 +1,4 @@
pyfaidx>=0.5.5.2
-six>=1.12.0
argh>=0.26.2
argcomplete>=1.9.4
simplejson
View it on GitLab: https://salsa.debian.org/med-team/python-gffutils/-/commit/26f906cabac3248bc95d36bc6b7923be24ead993
--
View it on GitLab: https://salsa.debian.org/med-team/python-gffutils/-/commit/26f906cabac3248bc95d36bc6b7923be24ead993
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20240416/fd20dda6/attachment-0001.htm>
More information about the debian-med-commit
mailing list