[med-svn] [Git][med-team/python-pyfaidx][upstream] New upstream version 0.7.0
Andreas Tille (@tille)
gitlab at salsa.debian.org
Wed Jul 27 14:25:46 BST 2022
Andreas Tille pushed to branch upstream at Debian Med / python-pyfaidx
Commits:
1e05ce51 by Andreas Tille at 2022-07-14T16:43:48+02:00
New upstream version 0.7.0
- - - - -
14 changed files:
- README.rst
- dev-requirements.txt
- pyfaidx/__init__.py
- pyfaidx/cli.py
- tests/test_Fasta_bgzip.py
- tests/test_faidx.py
- tests/test_feature_bounds_check.py
- tests/test_feature_indexing.py
- tests/test_feature_key_function.py
- tests/test_feature_read_ahead_buffer.py
- tests/test_feature_sequence_as_raw.py
- tests/test_feature_split_char.py
- + tests/test_fsspec.py
- tests/test_sequence_class.py
Changes:
=====================================
README.rst
=====================================
@@ -336,6 +336,18 @@ The FastaVariant class provides a way to integrate single nucleotide variant cal
>>> consensus['22'].variant_sites
(16042793, 29187373, 29187448, 29194610, 29821332)
+Accessing fasta files from `filesystem_spec <https://filesystem-spec.readthedocs.io>`_ filesystems:
+
+.. code:: python
+
+ # new in v0.7.0
+ # pip install fsspec s3fs
+ >>> import fsspec
+ >>> from pyfaidx import Fasta
+ >>> of = fsspec.open("s3://broad-references/hg19/v0/Homo_sapiens_assembly19.fasta", anon=True)
+ >>> genes = Fasta(of)
+
+
.. _faidx:
It also provides a command-line script:
=====================================
dev-requirements.txt
=====================================
@@ -8,6 +8,7 @@ pysam
requests
coverage
pyfasta
-pyvcf
+pyvcf3
numpy
biopython
+fsspec
=====================================
pyfaidx/__init__.py
=====================================
@@ -5,6 +5,7 @@ Fasta file -> Faidx -> Fasta -> FastaRecord -> Sequence
from __future__ import division
+import datetime
import os
import re
import string
@@ -15,6 +16,7 @@ from collections import namedtuple
from itertools import islice
from math import ceil
from os.path import getmtime
+from tempfile import TemporaryFile
from threading import Lock
from pkg_resources import get_distribution
@@ -25,7 +27,12 @@ try:
from collections import OrderedDict
except ImportError: #python 2.6
from ordereddict import OrderedDict
-
+
+try:
+ import fsspec
+except ImportError:
+ fsspec = None
+
__version__ = get_distribution("pyfaidx").version
if sys.version_info > (3, ):
@@ -322,6 +329,7 @@ class Faidx(object):
def __init__(self,
filename,
+ indexname=None,
default_seq=None,
key_function=lambda x: x,
as_raw=False,
@@ -337,17 +345,52 @@ class Faidx(object):
rebuild=True,
build_index=True):
"""
- filename: name of fasta file
+ filename: name of fasta file or fsspec.core.OpenFile instance
+ indexname: name of index file or fsspec.core.OpenFile instance
key_function: optional callback function which should return a unique
key for the self.index dictionary when given rname.
as_raw: optional parameter to specify whether to return sequences as a
Sequence() object or as a raw string.
Default: False (i.e. return a Sequence() object).
"""
- self.filename = filename
+
+ if fsspec and isinstance(filename, fsspec.core.OpenFile):
+ self.filename = filename.path
+ assert getattr(filename, 'mode', 'rb') == 'rb'
+ assert getattr(filename, 'compression', None) is None # restriction could potentially be lifted for BGZF
+ try:
+ self.file = filename.open()
+ except IOError:
+ raise FastaNotFoundError("Cannot read FASTA from OpenFile %s" % filename)
+ self._fs = filename.fs
+
+ elif isinstance(filename, str) or hasattr(filename, '__fspath__'):
+ self.filename = str(filename)
+ try:
+ self.file = open(filename, 'r+b' if mutable else 'rb')
+ except IOError:
+ raise FastaNotFoundError("Cannot read FASTA from file %s" % filename)
+ self._fs = None
- if filename.lower().endswith('.bgz') or filename.lower().endswith(
- '.gz'):
+ else:
+ raise TypeError("filename expected str, os.PathLike or fsspec.OpenFile, got: %r" % filename)
+
+ if fsspec and isinstance(indexname, fsspec.core.OpenFile):
+ self.indexname = indexname.path
+ self._fai_fs = indexname.fs
+
+ elif isinstance(indexname, str) or hasattr(indexname, '__fspath__'):
+ self.indexname = str(indexname)
+ self._fai_fs = None
+
+ elif indexname is None:
+ self.indexname = self.filename + '.fai'
+ self._fai_fs = self._fs
+
+ else:
+ raise TypeError("indexname expected NoneType, str, os.PathLike or fsspec.OpenFile, got: %r" % indexname)
+
+ if self.filename.lower().endswith(('.bgz', '.gz')):
# Only try to import Bio if we actually need the bgzf reader.
try:
from Bio import bgzf
@@ -359,31 +402,22 @@ class Faidx(object):
raise ImportError(
"BioPython >= 1.73 must be installed to read block gzip files.")
else:
- self._fasta_opener = bgzf.open
self._bgzf = True
- elif filename.lower().endswith('.bz2') or filename.lower().endswith(
- '.zip'):
+ try:
+ # mutable mode is not supported for bzgf anyways
+ self.file = bgzf.BgzfReader(fileobj=self.file, mode="b")
+ except (ValueError, IOError):
+ raise UnsupportedCompressionFormat(
+ "Compressed FASTA is only supported in BGZF format. Use "
+ "the samtools bgzip utility (instead of gzip) to "
+ "compress your FASTA."
+ )
+ elif self.filename.lower().endswith(('.bz2', '.zip')):
raise UnsupportedCompressionFormat(
"Compressed FASTA is only supported in BGZF format. Use "
"bgzip to compresss your FASTA.")
else:
- self._fasta_opener = open
self._bgzf = False
-
- try:
- self.file = self._fasta_opener(filename, 'r+b'
- if mutable else 'rb')
- except (ValueError, IOError) as e:
- if str(e).find('BGZF') > -1:
- raise UnsupportedCompressionFormat(
- "Compressed FASTA is only supported in BGZF format. Use "
- "the samtools bgzip utility (instead of gzip) to "
- "compress your FASTA.")
- else:
- raise FastaNotFoundError(
- "Cannot read FASTA file %s" % filename)
-
- self.indexname = filename + '.fai'
self.read_long_names = read_long_names
self.key_function = key_function
try:
@@ -424,32 +458,46 @@ class Faidx(object):
self.mutable = mutable
with self.lock: # lock around index generation so only one thread calls method
- try:
- if os.path.exists(self.indexname) and getmtime(
- self.indexname) >= getmtime(self.filename):
- self.read_fai()
- elif os.path.exists(self.indexname) and getmtime(
- self.indexname) < getmtime(
- self.filename) and not rebuild:
- self.read_fai()
- warnings.warn(
- "Index file {0} is older than FASTA file {1}.".format(
- self.indexname, self.filename), RuntimeWarning)
- elif build_index:
- self.build_index()
- self.read_fai()
+
+ if self._fai_fs is None:
+ index_exists = os.path.exists(self.indexname)
+ else:
+ index_exists = self._fai_fs.exists(self.indexname)
+
+ if index_exists:
+ f_mtime = getmtime_fsspec(self.filename, self._fs)
+ i_mtime = getmtime_fsspec(self.indexname, self._fai_fs)
+ if f_mtime is None or i_mtime is None:
+ warnings.warn("for fsspec: %s assuming index is current" % type(self._fs).__name__)
+ index_is_stale = False
else:
- self.read_fai()
+ index_is_stale = f_mtime > i_mtime
+ else:
+ index_is_stale = False
- except FastaIndexingError:
- self.file.close()
- os.remove(self.indexname + '.tmp')
- raise
+ if (
+ build_index
+ and (not index_exists or (index_is_stale and rebuild))
+ ):
+ try:
+ self.build_index()
+ except FastaIndexingError:
+ self.file.close()
+ raise
+
+ try:
+ self.read_fai()
except Exception:
- # Handle potential exceptions other than 'FastaIndexingError'
self.file.close()
raise
+ if index_is_stale and not rebuild:
+ warnings.warn(
+ "Index file {0} is older than FASTA file {1}.".format(
+ self.indexname, self.filename
+ ), RuntimeWarning
+ )
+
def __contains__(self, region):
if not self.buffer['name']:
return False
@@ -469,7 +517,7 @@ class Faidx(object):
def read_fai(self):
try:
- with open(self.indexname) as index:
+ with self._open_fai(mode='r') as index:
prev_bend = 0
drop_keys = []
for line in index:
@@ -518,9 +566,10 @@ class Faidx(object):
"Could not read index file %s" % self.indexname)
def build_index(self):
+ assert self.file.tell() == 0
try:
- with self._fasta_opener(self.filename, 'rb') as fastafile:
- with open(self.indexname + '.tmp', 'w') as indexfile:
+ with Rewind(self.file) as fastafile:
+ with TemporaryFile(mode='w+') as indexfile:
rname = None # reference sequence name
offset = 0 # binary offset of end of current line
rlen = 0 # reference character length
@@ -601,7 +650,10 @@ class Faidx(object):
"Inconsistent line found in >{0} at "
"line {1:n}.".format(rname,
bad_lines[0][0] + 1))
- shutil.move(self.indexname + '.tmp', self.indexname)
+
+ indexfile.seek(0)
+ with self._open_fai(mode='w') as target:
+ shutil.copyfileobj(indexfile, target)
except (IOError, FastaIndexingError) as e:
if isinstance(e, IOError):
raise IOError(
@@ -612,10 +664,16 @@ class Faidx(object):
def write_fai(self):
with self.lock:
- with open(self.indexname, 'w') as outfile:
+ with self._open_fai(mode='w') as outfile:
for line in self._index_as_string():
outfile.write(line)
+ def _open_fai(self, mode):
+ if self._fai_fs:
+ return self._fai_fs.open(self.indexname, mode=mode)
+ else:
+ return open(self.indexname, mode=mode)
+
def from_buffer(self, start, end):
i_start = start - self.buffer['start'] # want [0, 1) coordinates from [1, 1] coordinates
i_end = end - self.buffer['start'] + 1
@@ -732,6 +790,10 @@ class Faidx(object):
raise IOError(
"Write attempted for immutable Faidx instance. Set mutable=True to modify original FASTA."
)
+ elif self.mutable and self._fs:
+ raise NotImplementedError(
+ "Writing to mutable instances is not implemented for fsspec objects."
+ )
file_seq, internals = self.from_file(rname, start, end, internals=True)
with self.lock:
@@ -944,7 +1006,7 @@ class FastaRecord(object):
class MutableFastaRecord(FastaRecord):
def __init__(self, name, fa):
super(MutableFastaRecord, self).__init__(name, fa)
- if self._fa.faidx._fasta_opener != open:
+ if self._fa.faidx._bgzf:
raise UnsupportedCompressionFormat(
"BGZF compressed FASTA is not supported for MutableFastaRecord. "
"Please decompress your FASTA file.")
@@ -979,6 +1041,7 @@ class MutableFastaRecord(FastaRecord):
class Fasta(object):
def __init__(self,
filename,
+ indexname=None,
default_seq=None,
key_function=lambda x: x,
as_raw=False,
@@ -995,12 +1058,14 @@ class Fasta(object):
build_index=True):
"""
An object that provides a pygr compatible interface.
- filename: name of fasta file
+ filename: name of fasta file or fsspec.core.OpenFile instance
+ indexname: name of index file or fsspec.core.OpenFile instance
"""
self.filename = filename
self.mutable = mutable
self.faidx = Faidx(
filename,
+ indexname=indexname,
key_function=key_function,
as_raw=as_raw,
default_seq=default_seq,
@@ -1117,7 +1182,7 @@ class FastaVariant(Fasta):
try:
import vcf
except ImportError:
- raise ImportError("PyVCF must be installed for FastaVariant.")
+ raise ImportError("PyVCF3 must be installed for FastaVariant.")
if call_filter is not None:
try:
key, expr, value = call_filter.split() # 'GQ > 30'
@@ -1201,6 +1266,53 @@ def wrap_sequence(n, sequence, fillvalue=''):
yield ''.join(line + ("\n", ))
+class Rewind:
+ """
+ use a fileobject in a context manager and rewind it back to its original position
+ """
+ def __init__(self, fileobj):
+ self.fileobj = fileobj
+ self.origin = None
+
+ def __enter__(self):
+ self.origin = self.fileobj.tell()
+ return self.fileobj
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ self.fileobj.seek(self.origin)
+ self.origin = None
+
+
+def getmtime_fsspec(path, fs):
+ """get the modification time of a file in a fsspec compatible way"""
+ if fs is None:
+ mtime = getmtime(path)
+ else:
+ # getting mtime for different fsspec filesystems is currently
+ # not well abstracted and leaks implementation details of
+ # different filesystems.
+ # See: https://github.com/fsspec/filesystem_spec/issues/526
+ f_info = fs.stat(path)
+ if 'mtime' in f_info:
+ mtime = f_info['mtime']
+ elif 'LastModified' in f_info:
+ mtime = f_info['LastModified']
+ elif 'updated' in f_info:
+ mtime = f_info['updated']
+ elif 'created' in f_info:
+ mtime = f_info['created']
+ else:
+ return None
+ if isinstance(mtime, float):
+ return mtime
+ elif isinstance(mtime, str):
+ return datetime.datetime.fromisoformat(mtime.replace("Z", "+00:00")).timestamp()
+ elif isinstance(mtime, datetime.datetime):
+ return mtime.timestamp()
+ else:
+ return None
+
+
# To take a complement, we map each character in the first string in this pair
# to the corresponding character in the second string.
complement_map = ('ACTGNactgnYRWSKMDVHBXyrwskmdvhbx',
@@ -1245,6 +1357,8 @@ def translate_chr_name(from_name, to_name):
def bed_split(bed_entry):
+ if bed_entry[0] == "#":
+ return (None, None, None)
try:
rname, start, end = bed_entry.rstrip().split()[:3]
except (IndexError, ValueError):
=====================================
pyfaidx/cli.py
=====================================
@@ -25,6 +25,9 @@ def write_sequence(args):
header = False
for region in regions_to_fetch:
name, start, end = split_function(region)
+ # allow the split_funtion to return None to signify input we should skip
+ if name == None:
+ continue
if args.size_range:
if start is not None and end is not None:
sequence_len = end - start
=====================================
tests/test_Fasta_bgzip.py
=====================================
@@ -70,9 +70,9 @@ def test_line_len(remove_index):
for record in fasta:
assert len(next(iter(record))) == fasta.faidx.index[record.name].lenc
- at pytest.mark.xfail(raises=UnsupportedCompressionFormat)
def test_mutable_bgzf(remove_index):
- fasta = Fasta('data/genes.fasta.gz', mutable=True)
+ with pytest.raises(UnsupportedCompressionFormat):
+ fasta = Fasta('data/genes.fasta.gz', mutable=True)
@pytest.mark.xfail(raises=NotImplementedError)
def test_long_names(remove_index):
@@ -120,15 +120,14 @@ def test_fetch_end(remove_index):
480, 481)
assert str(result) == expect
- at pytest.mark.xfail(raises=FetchError)
def test_fetch_border(remove_index):
""" Fetch past the end of a gene entry """
faidx = Faidx('data/genes.fasta.gz')
expect = 'TC'
- result = faidx.fetch('gi|557361099|gb|KF435150.1|',
- 480, 500)
- print(result)
- assert str(result) == expect
+ with pytest.raises(FetchError):
+ result = faidx.fetch('gi|557361099|gb|KF435150.1|', 480, 500)
+ print(result)
+ assert str(result) == expect
def test_rev(remove_index):
faidx = Faidx('data/genes.fasta.gz')
@@ -137,33 +136,29 @@ def test_rev(remove_index):
480, 481)
assert str(-result) == expect, result
- at pytest.mark.xfail(raises=FetchError)
def test_fetch_past_bounds(remove_index):
""" Fetch past the end of a gene entry """
faidx = Faidx('data/genes.fasta.gz', strict_bounds=True)
- result = faidx.fetch('gi|557361099|gb|KF435150.1|',
- 480, 5000)
+ with pytest.raises(FetchError):
+ result = faidx.fetch('gi|557361099|gb|KF435150.1|', 480, 5000)
- at pytest.mark.xfail(raises=FetchError)
def test_fetch_negative(remove_index):
""" Fetch starting with a negative coordinate """
faidx = Faidx('data/genes.fasta.gz', strict_bounds=True)
- result = faidx.fetch('gi|557361099|gb|KF435150.1|',
- -10, 10)
+ with pytest.raises(FetchError):
+ result = faidx.fetch('gi|557361099|gb|KF435150.1|', -10, 10)
- at pytest.mark.xfail(raises=FetchError)
def test_fetch_reversed_coordinates(remove_index):
""" Fetch starting with a negative coordinate """
faidx = Faidx('data/genes.fasta.gz', strict_bounds=True)
- result = faidx.fetch('gi|557361099|gb|KF435150.1|',
- 50, 10)
+ with pytest.raises(FetchError):
+ result = faidx.fetch('gi|557361099|gb|KF435150.1|', 50, 10)
- at pytest.mark.xfail(raises=FetchError)
def test_fetch_keyerror(remove_index):
""" Fetch a key that does not exist """
faidx = Faidx('data/genes.fasta.gz', strict_bounds=True)
- result = faidx.fetch('gi|joe|gb|KF435150.1|',
- 1, 10)
+ with pytest.raises(FetchError):
+ result = faidx.fetch('gi|joe|gb|KF435150.1|', 1, 10)
def test_blank_string(remove_index):
""" seq[0:0] should return a blank string mdshw5/pyfaidx#53 """
@@ -230,12 +225,12 @@ def test_issue_79_fix_one_based_false_negate(remove_index):
print(s.__dict__)
assert (105, 100) == (s.start, s.end)
- at pytest.mark.xfail(raises=FetchError)
def test_fetch_border_padded(remove_index):
""" Fetch past the end of a gene entry """
- faidx = Faidx('data/genes.fasta.gz', default_seq='N')
- expect = 'TCNNNNNNNNNNNNNNNNNNN'
- result = faidx.fetch('gi|557361099|gb|KF435150.1|',
- 480, 500)
- print(result)
- assert str(result) == expect
+ with pytest.raises(FetchError):
+ faidx = Faidx('data/genes.fasta.gz', default_seq='N')
+ expect = 'TCNNNNNNNNNNNNNNNNNNN'
+ result = faidx.fetch('gi|557361099|gb|KF435150.1|',
+ 480, 500)
+ print(result)
+ assert str(result) == expect
=====================================
tests/test_faidx.py
=====================================
@@ -16,9 +16,9 @@ def remove_index():
except EnvironmentError:
pass # some tests may delete this file
- at pytest.mark.xfail(raises=BedError)
def test_short_line_lengths(remove_index):
- main(['data/genes.fasta', '--bed', 'data/malformed.bed'])
+ with pytest.raises(BedError):
+ main(['data/genes.fasta', '--bed', 'data/malformed.bed'])
def test_fetch_whole_file(remove_index):
main(['data/genes.fasta'])
@@ -28,9 +28,9 @@ def test_split_entry(remove_index):
assert os.path.exists('gi557361099gbKF435150.1.fasta')
os.remove('gi557361099gbKF435150.1.fasta')
- at pytest.mark.xfail(raises=FetchError)
def test_fetch_error(remove_index):
- main(['data/genes.fasta', 'gi|557361099|gb|KF435150.1|:1-1000'])
+ with pytest.raises(FetchError):
+ main(['data/genes.fasta', 'gi|557361099|gb|KF435150.1|:1-1000'])
def test_key_warning(remove_index):
main(['data/genes.fasta', 'foo'])
=====================================
tests/test_feature_bounds_check.py
=====================================
@@ -93,33 +93,29 @@ def test_rev(remove_index):
480, 481)
assert str(-result) == expect, result
- at pytest.mark.xfail(raises=FetchError)
def test_fetch_past_bounds(remove_index):
""" Fetch past the end of a gene entry """
faidx = Faidx('data/genes.fasta', strict_bounds=True)
- result = faidx.fetch('gi|557361099|gb|KF435150.1|',
- 480, 5000)
+ with pytest.raises(FetchError):
+ result = faidx.fetch('gi|557361099|gb|KF435150.1|', 480, 5000)
- at pytest.mark.xfail(raises=FetchError)
def test_fetch_negative(remove_index):
""" Fetch starting with a negative coordinate """
faidx = Faidx('data/genes.fasta', strict_bounds=True)
- result = faidx.fetch('gi|557361099|gb|KF435150.1|',
- -10, 10)
+ with pytest.raises(FetchError):
+ result = faidx.fetch('gi|557361099|gb|KF435150.1|', -10, 10)
- at pytest.mark.xfail(raises=FetchError)
def test_fetch_reversed_coordinates(remove_index):
""" Fetch starting with a negative coordinate """
faidx = Faidx('data/genes.fasta', strict_bounds=True)
- result = faidx.fetch('gi|557361099|gb|KF435150.1|',
- 50, 10)
+ with pytest.raises(FetchError):
+ result = faidx.fetch('gi|557361099|gb|KF435150.1|', 50, 10)
- at pytest.mark.xfail(raises=FetchError)
def test_fetch_keyerror(remove_index):
""" Fetch a key that does not exist """
faidx = Faidx('data/genes.fasta', strict_bounds=True)
- result = faidx.fetch('gi|joe|gb|KF435150.1|',
- 1, 10)
+ with pytest.raises(FetchError):
+ result = faidx.fetch('gi|joe|gb|KF435150.1|', 1, 10)
def test_blank_string(remove_index):
""" seq[0:0] should return a blank string mdshw5/pyfaidx#53 """
=====================================
tests/test_feature_indexing.py
=====================================
@@ -331,13 +331,12 @@ def test_read_back_index(remove_index):
finally:
locale.setlocale(locale.LC_NUMERIC, old_locale)
- at pytest.mark.xfail(raises=IndexNotFoundError)
def test_issue_134_no_build_index(remove_index):
""" Ensure that index file is not built when build_index=False. See mdshw5/pyfaidx#134.
"""
- faidx = Faidx('data/genes.fasta', build_index=False)
+ with pytest.raises(IndexNotFoundError):
+ faidx = Faidx('data/genes.fasta', build_index=False)
- at pytest.mark.xfail(raises=FastaIndexingError)
def test_issue_144_no_defline(remove_index):
""" Ensure that an exception is raised when a file contains no deflines. See mdshw5/pyfaidx#144.
"""
@@ -347,6 +346,7 @@ def test_issue_144_no_defline(remove_index):
# Write simple fasta file
with open(fasta_path, 'w') as fasta_out:
fasta_out.write("CTCCGGGCCCAT\nATAAAGCCTAAA\n")
- faidx = Faidx(fasta_path)
+ with pytest.raises(FastaIndexingError):
+ faidx = Faidx(fasta_path)
finally:
shutil.rmtree(tmp_dir)
\ No newline at end of file
=====================================
tests/test_feature_key_function.py
=====================================
@@ -60,9 +60,9 @@ def test_key_function_by_fetch(remove_index):
100, 150)
assert str(result) == expect
- at pytest.mark.xfail(raises=ValueError)
def test_duplicated_keys(remove_index):
- genes = Fasta('data/genes.fasta', key_function=get_duplicated_gene_name)
+ with pytest.raises(ValueError):
+ genes = Fasta('data/genes.fasta', key_function=get_duplicated_gene_name)
def test_duplicated_keys_shortest(remove_index):
genes = Fasta('data/genes.fasta', key_function=get_duplicated_gene_name, duplicate_action="shortest")
=====================================
tests/test_feature_read_ahead_buffer.py
=====================================
@@ -31,11 +31,11 @@ def test_buffer_exceed(remove_index):
result = fasta['gi|557361099|gb|KF435150.1|'][0:400].seq.lower()
assert result == expect
- at pytest.mark.xfail(raises=FetchError)
def test_bounds_error(remove_index):
fasta = Fasta('data/genes.fasta', read_ahead=300, strict_bounds=True)
- result = fasta['gi|557361099|gb|KF435150.1|'][100-1:15000].seq.lower()
+ with pytest.raises(FetchError):
+ result = fasta['gi|557361099|gb|KF435150.1|'][100-1:15000].seq.lower()
- at pytest.mark.xfail(raises=ValueError)
def test_buffer_value(remove_index):
- Fasta('data/genes.fasta', read_ahead=0.5)
\ No newline at end of file
+ with pytest.raises(ValueError):
+ Fasta('data/genes.fasta', read_ahead=0.5)
=====================================
tests/test_feature_sequence_as_raw.py
=====================================
@@ -25,15 +25,15 @@ def test_as_raw_true(remove_index):
result = fasta['gi|557361099|gb|KF435150.1|'][100-1:150].lower()
assert result == expect
- at pytest.mark.xfail(raises=AttributeError)
def test_as_raw_false_error(remove_index):
fasta = Fasta('data/genes.fasta')
- result = fasta['gi|557361099|gb|KF435150.1|'][100-1:150].lower()
+ with pytest.raises(AttributeError):
+ result = fasta['gi|557361099|gb|KF435150.1|'][100-1:150].lower()
- at pytest.mark.xfail(raises=AttributeError)
def test_as_raw_true_error(remove_index):
fasta = Fasta('data/genes.fasta', as_raw=True)
- result = fasta['gi|557361099|gb|KF435150.1|'][100-1:150].seq.lower()
+ with pytest.raises(AttributeError):
+ result = fasta['gi|557361099|gb|KF435150.1|'][100-1:150].seq.lower()
def test_as_raw_type_when_blen_lt_0(remove_index):
fasta = Fasta('data/genes.fasta', as_raw=True)
=====================================
tests/test_feature_split_char.py
=====================================
@@ -33,6 +33,6 @@ def test_key_function_by_fetch(remove_index):
100, 150)
assert str(result) == expect
- at pytest.mark.xfail(raises=ValueError)
def test_stop(remove_index):
- fasta = Fasta('data/genes.fasta', split_char='|')
+ with pytest.raises(ValueError):
+ fasta = Fasta('data/genes.fasta', split_char='|')
=====================================
tests/test_fsspec.py
=====================================
@@ -0,0 +1,56 @@
+import os
+
+import pytest
+
+from pyfaidx import Fasta
+
+try:
+ import fsspec
+ from fsspec.core import OpenFile
+except ImportError:
+ pytestmark = pytest.mark.skip
+
+
+ at pytest.fixture(scope="function")
+def openfile_genes_fasta():
+ testdir = os.path.dirname(__file__)
+ genes_fasta = os.path.join(testdir, 'data', 'genes.fasta')
+
+ fs = fsspec.filesystem("memory")
+ with fs.open('genes.fasta', mode='wb') as f:
+ with open(genes_fasta, mode="rb") as g:
+ f.write(g.read())
+
+ try:
+ yield fsspec.open('memory://genes.fasta', mode='rb')
+ finally:
+ fs.rm("/**", recursive=True)
+ assert not fs.ls("/")
+
+
+def test_fsspec_fetch_whole_file(openfile_genes_fasta):
+ _ = Fasta(openfile_genes_fasta)
+
+
+def test_fsspec_default_index(openfile_genes_fasta):
+ _ = Fasta(openfile_genes_fasta)
+
+ fs = openfile_genes_fasta.fs
+ assert fs.isfile(openfile_genes_fasta.path + ".fai")
+ assert fs.size(openfile_genes_fasta.path + ".fai") > 0
+
+
+def test_fsspec_local_index(openfile_genes_fasta, tmp_path):
+ index = tmp_path.joinpath("my_local_index.fai")
+ _ = Fasta(openfile_genes_fasta, indexname=index)
+ assert index.is_file()
+ assert index.stat().st_size > 0
+
+
+def test_fsspec_remote_index(openfile_genes_fasta):
+ f_fai = fsspec.open("memory://some_other_index.fai")
+ _ = Fasta(openfile_genes_fasta, indexname=f_fai)
+
+ fs = f_fai.fs
+ assert fs.isfile("some_other_index.fai")
+ assert fs.size("some_other_index.fai") > 0
=====================================
tests/test_sequence_class.py
=====================================
@@ -19,9 +19,9 @@ def test_negate_metadata():
seq_neg = -seq
assert seq_neg.__repr__() == seq.complement[::-1].__repr__()
- at pytest.mark.xfail(raises=ValueError)
def test_seq_invalid():
- seq_invalid.complement()
+ with pytest.raises(ValueError):
+ seq_invalid.complement()
def test_integer_index():
assert seq[1].seq == 'T'
@@ -29,15 +29,15 @@ def test_integer_index():
def test_slice_index():
assert seq[0:10].seq == 'TTGAAGATTT'
- at pytest.mark.xfail(raises=ValueError)
def test_comp_invalid():
- complement(comp_invalid)
+ with pytest.raises(ValueError):
+ complement(comp_invalid)
- at pytest.mark.xfail(raises=ValueError)
def test_check_coordinates():
x = Sequence(name='gi|557361099|gb|KF435150.1|', seq='TTGAAGATTTTGCATGCAGCAGGTGCGCAAGGTGAAATGTTCACTGTTAAA',
start=100, end=110)
- x[:]
+ with pytest.raises(ValueError):
+ _ = x[:]
def test_comp_valid():
assert complement(comp_valid).startswith("AACTTCTAAAnCG")
View it on GitLab: https://salsa.debian.org/med-team/python-pyfaidx/-/commit/1e05ce518423cfc1bd3db46f012f118fed1f02aa
--
View it on GitLab: https://salsa.debian.org/med-team/python-pyfaidx/-/commit/1e05ce518423cfc1bd3db46f012f118fed1f02aa
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20220727/6a55bda7/attachment-0001.htm>
More information about the debian-med-commit
mailing list