[med-svn] [Git][med-team/python-pyfaidx][upstream] New upstream version 0.7.0

Andreas Tille (@tille) gitlab at salsa.debian.org
Wed Jul 27 14:25:46 BST 2022



Andreas Tille pushed to branch upstream at Debian Med / python-pyfaidx


Commits:
1e05ce51 by Andreas Tille at 2022-07-14T16:43:48+02:00
New upstream version 0.7.0
- - - - -


14 changed files:

- README.rst
- dev-requirements.txt
- pyfaidx/__init__.py
- pyfaidx/cli.py
- tests/test_Fasta_bgzip.py
- tests/test_faidx.py
- tests/test_feature_bounds_check.py
- tests/test_feature_indexing.py
- tests/test_feature_key_function.py
- tests/test_feature_read_ahead_buffer.py
- tests/test_feature_sequence_as_raw.py
- tests/test_feature_split_char.py
- + tests/test_fsspec.py
- tests/test_sequence_class.py


Changes:

=====================================
README.rst
=====================================
@@ -336,6 +336,18 @@ The FastaVariant class provides a way to integrate single nucleotide variant cal
     >>> consensus['22'].variant_sites
     (16042793, 29187373, 29187448, 29194610, 29821332)
 
+Accessing fasta files from `filesystem_spec <https://filesystem-spec.readthedocs.io>`_ filesystems:
+
+.. code:: python
+
+    # new in v0.7.0
+    # pip install fsspec s3fs
+    >>> import fsspec
+    >>> from pyfaidx import Fasta
+    >>> of = fsspec.open("s3://broad-references/hg19/v0/Homo_sapiens_assembly19.fasta", anon=True)
+    >>> genes = Fasta(of)
+
+
 .. _faidx:
 
 It also provides a command-line script:


=====================================
dev-requirements.txt
=====================================
@@ -8,6 +8,7 @@ pysam
 requests 
 coverage 
 pyfasta 
-pyvcf 
+pyvcf3
 numpy 
 biopython
+fsspec


=====================================
pyfaidx/__init__.py
=====================================
@@ -5,6 +5,7 @@ Fasta file -> Faidx -> Fasta -> FastaRecord -> Sequence
 
 from __future__ import division
 
+import datetime
 import os
 import re
 import string
@@ -15,6 +16,7 @@ from collections import namedtuple
 from itertools import islice
 from math import ceil
 from os.path import getmtime
+from tempfile import TemporaryFile
 from threading import Lock
 from pkg_resources import get_distribution
 
@@ -25,7 +27,12 @@ try:
     from collections import OrderedDict
 except ImportError:  #python 2.6
     from ordereddict import OrderedDict
-    
+
+try:
+    import fsspec
+except ImportError:
+    fsspec = None
+
 __version__ = get_distribution("pyfaidx").version
 
 if sys.version_info > (3, ):
@@ -322,6 +329,7 @@ class Faidx(object):
 
     def __init__(self,
                  filename,
+                 indexname=None,
                  default_seq=None,
                  key_function=lambda x: x,
                  as_raw=False,
@@ -337,17 +345,52 @@ class Faidx(object):
                  rebuild=True,
                  build_index=True):
         """
-        filename: name of fasta file
+        filename: name of fasta file or fsspec.core.OpenFile instance
+        indexname: name of index file or fsspec.core.OpenFile instance
         key_function: optional callback function which should return a unique
           key for the self.index dictionary when given rname.
         as_raw: optional parameter to specify whether to return sequences as a
           Sequence() object or as a raw string.
           Default: False (i.e. return a Sequence() object).
         """
-        self.filename = filename
+        
+        if fsspec and isinstance(filename, fsspec.core.OpenFile):
+            self.filename = filename.path
+            assert getattr(filename, 'mode', 'rb') == 'rb'
+            assert getattr(filename, 'compression', None) is None  # restriction could potentially be lifted for BGZF
+            try:
+                self.file = filename.open()
+            except IOError:
+                raise FastaNotFoundError("Cannot read FASTA from OpenFile %s" % filename)
+            self._fs = filename.fs
+
+        elif isinstance(filename, str) or hasattr(filename, '__fspath__'):
+            self.filename = str(filename)
+            try:
+                self.file = open(filename, 'r+b' if mutable else 'rb')
+            except IOError:
+                raise FastaNotFoundError("Cannot read FASTA from file %s" % filename)
+            self._fs = None
 
-        if filename.lower().endswith('.bgz') or filename.lower().endswith(
-                '.gz'):
+        else:
+            raise TypeError("filename expected str, os.PathLike or fsspec.OpenFile, got: %r" % filename)
+
+        if fsspec and isinstance(indexname, fsspec.core.OpenFile):
+            self.indexname = indexname.path
+            self._fai_fs = indexname.fs
+            
+        elif isinstance(indexname, str) or hasattr(indexname, '__fspath__'):
+            self.indexname = str(indexname)
+            self._fai_fs = None 
+            
+        elif indexname is None:
+            self.indexname = self.filename + '.fai'
+            self._fai_fs = self._fs
+            
+        else:
+            raise TypeError("indexname expected NoneType, str, os.PathLike or fsspec.OpenFile, got: %r" % indexname)
+        
+        if self.filename.lower().endswith(('.bgz', '.gz')):
             # Only try to import Bio if we actually need the bgzf reader.
             try:
                 from Bio import bgzf
@@ -359,31 +402,22 @@ class Faidx(object):
                 raise ImportError(
                     "BioPython >= 1.73 must be installed to read block gzip files.")
             else:
-                self._fasta_opener = bgzf.open
                 self._bgzf = True
-        elif filename.lower().endswith('.bz2') or filename.lower().endswith(
-                '.zip'):
+                try:
+                    # mutable mode is not supported for bzgf anyways
+                    self.file = bgzf.BgzfReader(fileobj=self.file, mode="b")
+                except (ValueError, IOError):
+                    raise UnsupportedCompressionFormat(
+                        "Compressed FASTA is only supported in BGZF format. Use "
+                        "the samtools bgzip utility (instead of gzip) to "
+                        "compress your FASTA."
+                    )
+        elif self.filename.lower().endswith(('.bz2', '.zip')):
             raise UnsupportedCompressionFormat(
                 "Compressed FASTA is only supported in BGZF format. Use "
                 "bgzip to compresss your FASTA.")
         else:
-            self._fasta_opener = open
             self._bgzf = False
-
-        try:
-            self.file = self._fasta_opener(filename, 'r+b'
-                                           if mutable else 'rb')
-        except (ValueError, IOError) as e:
-            if str(e).find('BGZF') > -1:
-                raise UnsupportedCompressionFormat(
-                    "Compressed FASTA is only supported in BGZF format. Use "
-                    "the samtools bgzip utility (instead of gzip) to "
-                    "compress your FASTA.")
-            else:
-                raise FastaNotFoundError(
-                    "Cannot read FASTA file %s" % filename)
-
-        self.indexname = filename + '.fai'
         self.read_long_names = read_long_names
         self.key_function = key_function
         try:
@@ -424,32 +458,46 @@ class Faidx(object):
 
         self.mutable = mutable
         with self.lock:  # lock around index generation so only one thread calls method
-            try:
-                if os.path.exists(self.indexname) and getmtime(
-                        self.indexname) >= getmtime(self.filename):
-                    self.read_fai()
-                elif os.path.exists(self.indexname) and getmtime(
-                        self.indexname) < getmtime(
-                            self.filename) and not rebuild:
-                    self.read_fai()
-                    warnings.warn(
-                        "Index file {0} is older than FASTA file {1}.".format(
-                            self.indexname, self.filename), RuntimeWarning)
-                elif build_index:
-                    self.build_index()
-                    self.read_fai()
+
+            if self._fai_fs is None:
+                index_exists = os.path.exists(self.indexname)
+            else:
+                index_exists = self._fai_fs.exists(self.indexname)
+
+            if index_exists:
+                f_mtime = getmtime_fsspec(self.filename, self._fs)
+                i_mtime = getmtime_fsspec(self.indexname, self._fai_fs)
+                if f_mtime is None or i_mtime is None:
+                    warnings.warn("for fsspec: %s assuming index is current" % type(self._fs).__name__)
+                    index_is_stale = False
                 else:
-                    self.read_fai()
+                    index_is_stale = f_mtime > i_mtime
+            else:
+                index_is_stale = False
 
-            except FastaIndexingError:
-                self.file.close()
-                os.remove(self.indexname + '.tmp')
-                raise
+            if (
+                build_index
+                and (not index_exists or (index_is_stale and rebuild))
+            ):
+                try:
+                    self.build_index()
+                except FastaIndexingError:
+                    self.file.close()
+                    raise
+
+            try:
+                self.read_fai()
             except Exception:
-                # Handle potential exceptions other than 'FastaIndexingError'
                 self.file.close()
                 raise
 
+            if index_is_stale and not rebuild:
+                warnings.warn(
+                    "Index file {0} is older than FASTA file {1}.".format(
+                        self.indexname, self.filename
+                    ), RuntimeWarning
+                )
+
     def __contains__(self, region):
         if not self.buffer['name']:
             return False
@@ -469,7 +517,7 @@ class Faidx(object):
 
     def read_fai(self):
         try:
-            with open(self.indexname) as index:
+            with self._open_fai(mode='r') as index:
                 prev_bend = 0
                 drop_keys = []
                 for line in index:
@@ -518,9 +566,10 @@ class Faidx(object):
                 "Could not read index file %s" % self.indexname)
 
     def build_index(self):
+        assert self.file.tell() == 0
         try:
-            with self._fasta_opener(self.filename, 'rb') as fastafile:
-                with open(self.indexname + '.tmp', 'w') as indexfile:
+            with Rewind(self.file) as fastafile:
+                with TemporaryFile(mode='w+') as indexfile:
                     rname = None  # reference sequence name
                     offset = 0  # binary offset of end of current line
                     rlen = 0  # reference character length
@@ -601,7 +650,10 @@ class Faidx(object):
                                 "Inconsistent line found in >{0} at "
                                 "line {1:n}.".format(rname,
                                                      bad_lines[0][0] + 1))
-            shutil.move(self.indexname + '.tmp', self.indexname)
+
+                    indexfile.seek(0)
+                    with self._open_fai(mode='w') as target:
+                        shutil.copyfileobj(indexfile, target)
         except (IOError, FastaIndexingError) as e:
             if isinstance(e, IOError):
                 raise IOError(
@@ -612,10 +664,16 @@ class Faidx(object):
 
     def write_fai(self):
         with self.lock:
-            with open(self.indexname, 'w') as outfile:
+            with self._open_fai(mode='w') as outfile:
                 for line in self._index_as_string():
                     outfile.write(line)
 
+    def _open_fai(self, mode):
+        if self._fai_fs:
+            return self._fai_fs.open(self.indexname, mode=mode)
+        else:
+            return open(self.indexname, mode=mode)
+
     def from_buffer(self, start, end):
         i_start = start - self.buffer['start']  # want [0, 1) coordinates from [1, 1] coordinates
         i_end = end - self.buffer['start'] + 1
@@ -732,6 +790,10 @@ class Faidx(object):
             raise IOError(
                 "Write attempted for immutable Faidx instance. Set mutable=True to modify original FASTA."
             )
+        elif self.mutable and self._fs:
+            raise NotImplementedError(
+                "Writing to mutable instances is not implemented for fsspec objects."
+            )
         file_seq, internals = self.from_file(rname, start, end, internals=True)
 
         with self.lock:
@@ -944,7 +1006,7 @@ class FastaRecord(object):
 class MutableFastaRecord(FastaRecord):
     def __init__(self, name, fa):
         super(MutableFastaRecord, self).__init__(name, fa)
-        if self._fa.faidx._fasta_opener != open:
+        if self._fa.faidx._bgzf:
             raise UnsupportedCompressionFormat(
                 "BGZF compressed FASTA is not supported for MutableFastaRecord. "
                 "Please decompress your FASTA file.")
@@ -979,6 +1041,7 @@ class MutableFastaRecord(FastaRecord):
 class Fasta(object):
     def __init__(self,
                  filename,
+                 indexname=None,
                  default_seq=None,
                  key_function=lambda x: x,
                  as_raw=False,
@@ -995,12 +1058,14 @@ class Fasta(object):
                  build_index=True):
         """
         An object that provides a pygr compatible interface.
-        filename: name of fasta file
+        filename:  name of fasta file or fsspec.core.OpenFile instance
+        indexname: name of index file or fsspec.core.OpenFile instance
         """
         self.filename = filename
         self.mutable = mutable
         self.faidx = Faidx(
             filename,
+            indexname=indexname,
             key_function=key_function,
             as_raw=as_raw,
             default_seq=default_seq,
@@ -1117,7 +1182,7 @@ class FastaVariant(Fasta):
         try:
             import vcf
         except ImportError:
-            raise ImportError("PyVCF must be installed for FastaVariant.")
+            raise ImportError("PyVCF3 must be installed for FastaVariant.")
         if call_filter is not None:
             try:
                 key, expr, value = call_filter.split()  # 'GQ > 30'
@@ -1201,6 +1266,53 @@ def wrap_sequence(n, sequence, fillvalue=''):
         yield ''.join(line + ("\n", ))
 
 
+class Rewind:
+    """
+    use a fileobject in a context manager and rewind it back to its original position
+    """
+    def __init__(self, fileobj):
+        self.fileobj = fileobj
+        self.origin = None
+
+    def __enter__(self):
+        self.origin = self.fileobj.tell()
+        return self.fileobj
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.fileobj.seek(self.origin)
+        self.origin = None
+
+
+def getmtime_fsspec(path, fs):
+    """get the modification time of a file in a fsspec compatible way"""
+    if fs is None:
+        mtime = getmtime(path)
+    else:
+        # getting mtime for different fsspec filesystems is currently
+        # not well abstracted and leaks implementation details of
+        # different filesystems.
+        # See: https://github.com/fsspec/filesystem_spec/issues/526
+        f_info = fs.stat(path)
+        if 'mtime' in f_info:
+            mtime = f_info['mtime']
+        elif 'LastModified' in f_info:
+            mtime = f_info['LastModified']
+        elif 'updated' in f_info:
+            mtime = f_info['updated']
+        elif 'created' in f_info:
+            mtime = f_info['created']
+        else:
+            return None
+    if isinstance(mtime, float):
+        return mtime
+    elif isinstance(mtime, str):
+        return datetime.datetime.fromisoformat(mtime.replace("Z", "+00:00")).timestamp()
+    elif isinstance(mtime, datetime.datetime):
+        return mtime.timestamp()
+    else:
+        return None
+
+
 # To take a complement, we map each character in the first string in this pair
 # to the corresponding character in the second string.
 complement_map = ('ACTGNactgnYRWSKMDVHBXyrwskmdvhbx',
@@ -1245,6 +1357,8 @@ def translate_chr_name(from_name, to_name):
 
 
 def bed_split(bed_entry):
+    if bed_entry[0] == "#":
+        return (None, None, None)
     try:
         rname, start, end = bed_entry.rstrip().split()[:3]
     except (IndexError, ValueError):


=====================================
pyfaidx/cli.py
=====================================
@@ -25,6 +25,9 @@ def write_sequence(args):
     header = False
     for region in regions_to_fetch:
         name, start, end = split_function(region)
+        # allow the split_funtion to return None to signify input we should skip
+        if name == None:
+            continue
         if args.size_range:
             if start is not None and end is not None:
                 sequence_len = end - start


=====================================
tests/test_Fasta_bgzip.py
=====================================
@@ -70,9 +70,9 @@ def test_line_len(remove_index):
     for record in fasta:
         assert len(next(iter(record))) == fasta.faidx.index[record.name].lenc
 
- at pytest.mark.xfail(raises=UnsupportedCompressionFormat)
 def test_mutable_bgzf(remove_index):
-    fasta = Fasta('data/genes.fasta.gz', mutable=True)
+    with pytest.raises(UnsupportedCompressionFormat):
+        fasta = Fasta('data/genes.fasta.gz', mutable=True)
 
 @pytest.mark.xfail(raises=NotImplementedError)
 def test_long_names(remove_index):
@@ -120,15 +120,14 @@ def test_fetch_end(remove_index):
                          480, 481)
     assert str(result) == expect
 
- at pytest.mark.xfail(raises=FetchError)
 def test_fetch_border(remove_index):
     """ Fetch past the end of a gene entry """
     faidx = Faidx('data/genes.fasta.gz')
     expect = 'TC'
-    result = faidx.fetch('gi|557361099|gb|KF435150.1|',
-                         480, 500)
-    print(result)
-    assert str(result) == expect
+    with pytest.raises(FetchError):
+        result = faidx.fetch('gi|557361099|gb|KF435150.1|', 480, 500)
+        print(result)
+        assert str(result) == expect
 
 def test_rev(remove_index):
     faidx = Faidx('data/genes.fasta.gz')
@@ -137,33 +136,29 @@ def test_rev(remove_index):
                          480, 481)
     assert str(-result) == expect, result
 
- at pytest.mark.xfail(raises=FetchError)
 def test_fetch_past_bounds(remove_index):
     """ Fetch past the end of a gene entry """
     faidx = Faidx('data/genes.fasta.gz', strict_bounds=True)
-    result = faidx.fetch('gi|557361099|gb|KF435150.1|',
-                                     480, 5000)
+    with pytest.raises(FetchError):
+        result = faidx.fetch('gi|557361099|gb|KF435150.1|', 480, 5000)
 
- at pytest.mark.xfail(raises=FetchError)
 def test_fetch_negative(remove_index):
     """ Fetch starting with a negative coordinate """
     faidx = Faidx('data/genes.fasta.gz', strict_bounds=True)
-    result = faidx.fetch('gi|557361099|gb|KF435150.1|',
-                                     -10, 10)
+    with pytest.raises(FetchError):
+        result = faidx.fetch('gi|557361099|gb|KF435150.1|', -10, 10)
 
- at pytest.mark.xfail(raises=FetchError)
 def test_fetch_reversed_coordinates(remove_index):
     """ Fetch starting with a negative coordinate """
     faidx = Faidx('data/genes.fasta.gz', strict_bounds=True)
-    result = faidx.fetch('gi|557361099|gb|KF435150.1|',
-                                     50, 10)
+    with pytest.raises(FetchError):
+        result = faidx.fetch('gi|557361099|gb|KF435150.1|', 50, 10)
 
- at pytest.mark.xfail(raises=FetchError)
 def test_fetch_keyerror(remove_index):
     """ Fetch a key that does not exist """
     faidx = Faidx('data/genes.fasta.gz', strict_bounds=True)
-    result = faidx.fetch('gi|joe|gb|KF435150.1|',
-                                     1, 10)
+    with pytest.raises(FetchError):
+        result = faidx.fetch('gi|joe|gb|KF435150.1|', 1, 10)
 
 def test_blank_string(remove_index):
     """ seq[0:0] should return a blank string mdshw5/pyfaidx#53 """
@@ -230,12 +225,12 @@ def test_issue_79_fix_one_based_false_negate(remove_index):
     print(s.__dict__)
     assert (105, 100) == (s.start, s.end)
 
- at pytest.mark.xfail(raises=FetchError)
 def test_fetch_border_padded(remove_index):
     """ Fetch past the end of a gene entry """
-    faidx = Faidx('data/genes.fasta.gz', default_seq='N')
-    expect = 'TCNNNNNNNNNNNNNNNNNNN'
-    result = faidx.fetch('gi|557361099|gb|KF435150.1|',
-                         480, 500)
-    print(result)
-    assert str(result) == expect
+    with pytest.raises(FetchError):
+        faidx = Faidx('data/genes.fasta.gz', default_seq='N')
+        expect = 'TCNNNNNNNNNNNNNNNNNNN'
+        result = faidx.fetch('gi|557361099|gb|KF435150.1|',
+                             480, 500)
+        print(result)
+        assert str(result) == expect


=====================================
tests/test_faidx.py
=====================================
@@ -16,9 +16,9 @@ def remove_index():
     except EnvironmentError:
         pass  # some tests may delete this file
 
- at pytest.mark.xfail(raises=BedError)
 def test_short_line_lengths(remove_index):
-    main(['data/genes.fasta', '--bed', 'data/malformed.bed'])
+    with pytest.raises(BedError):
+        main(['data/genes.fasta', '--bed', 'data/malformed.bed'])
 
 def test_fetch_whole_file(remove_index):
     main(['data/genes.fasta'])
@@ -28,9 +28,9 @@ def test_split_entry(remove_index):
     assert os.path.exists('gi557361099gbKF435150.1.fasta')
     os.remove('gi557361099gbKF435150.1.fasta')
 
- at pytest.mark.xfail(raises=FetchError)
 def test_fetch_error(remove_index):
-    main(['data/genes.fasta', 'gi|557361099|gb|KF435150.1|:1-1000'])
+    with pytest.raises(FetchError):
+        main(['data/genes.fasta', 'gi|557361099|gb|KF435150.1|:1-1000'])
     
 def test_key_warning(remove_index):
     main(['data/genes.fasta', 'foo'])


=====================================
tests/test_feature_bounds_check.py
=====================================
@@ -93,33 +93,29 @@ def test_rev(remove_index):
                          480, 481)
     assert str(-result) == expect, result
 
- at pytest.mark.xfail(raises=FetchError)
 def test_fetch_past_bounds(remove_index):
     """ Fetch past the end of a gene entry """
     faidx = Faidx('data/genes.fasta', strict_bounds=True)
-    result = faidx.fetch('gi|557361099|gb|KF435150.1|',
-                                     480, 5000)
+    with pytest.raises(FetchError):
+        result = faidx.fetch('gi|557361099|gb|KF435150.1|', 480, 5000)
 
- at pytest.mark.xfail(raises=FetchError)
 def test_fetch_negative(remove_index):
     """ Fetch starting with a negative coordinate """
     faidx = Faidx('data/genes.fasta', strict_bounds=True)
-    result = faidx.fetch('gi|557361099|gb|KF435150.1|',
-                                     -10, 10)
+    with pytest.raises(FetchError):
+        result = faidx.fetch('gi|557361099|gb|KF435150.1|', -10, 10)
 
- at pytest.mark.xfail(raises=FetchError)
 def test_fetch_reversed_coordinates(remove_index):
     """ Fetch starting with a negative coordinate """
     faidx = Faidx('data/genes.fasta', strict_bounds=True)
-    result = faidx.fetch('gi|557361099|gb|KF435150.1|',
-                                     50, 10)
+    with pytest.raises(FetchError):
+        result = faidx.fetch('gi|557361099|gb|KF435150.1|', 50, 10)
 
- at pytest.mark.xfail(raises=FetchError)
 def test_fetch_keyerror(remove_index):
     """ Fetch a key that does not exist """
     faidx = Faidx('data/genes.fasta', strict_bounds=True)
-    result = faidx.fetch('gi|joe|gb|KF435150.1|',
-                                     1, 10)
+    with pytest.raises(FetchError):
+        result = faidx.fetch('gi|joe|gb|KF435150.1|', 1, 10)
 
 def test_blank_string(remove_index):
     """ seq[0:0] should return a blank string mdshw5/pyfaidx#53 """


=====================================
tests/test_feature_indexing.py
=====================================
@@ -331,13 +331,12 @@ def test_read_back_index(remove_index):
     finally:
         locale.setlocale(locale.LC_NUMERIC, old_locale)
 
- at pytest.mark.xfail(raises=IndexNotFoundError)
 def test_issue_134_no_build_index(remove_index):
     """ Ensure that index file is not built when build_index=False. See mdshw5/pyfaidx#134.
     """
-    faidx = Faidx('data/genes.fasta', build_index=False)
+    with pytest.raises(IndexNotFoundError):
+        faidx = Faidx('data/genes.fasta', build_index=False)
 
- at pytest.mark.xfail(raises=FastaIndexingError)
 def test_issue_144_no_defline(remove_index):
     """ Ensure that an exception is raised when a file contains no deflines. See mdshw5/pyfaidx#144.
     """
@@ -347,6 +346,7 @@ def test_issue_144_no_defline(remove_index):
         # Write simple fasta file
         with open(fasta_path, 'w') as fasta_out:
             fasta_out.write("CTCCGGGCCCAT\nATAAAGCCTAAA\n")
-        faidx = Faidx(fasta_path)
+        with pytest.raises(FastaIndexingError):
+            faidx = Faidx(fasta_path)
     finally:
         shutil.rmtree(tmp_dir)
\ No newline at end of file


=====================================
tests/test_feature_key_function.py
=====================================
@@ -60,9 +60,9 @@ def test_key_function_by_fetch(remove_index):
                          100, 150)
     assert str(result) == expect
 
- at pytest.mark.xfail(raises=ValueError)
 def test_duplicated_keys(remove_index):
-    genes = Fasta('data/genes.fasta', key_function=get_duplicated_gene_name)
+    with pytest.raises(ValueError):
+        genes = Fasta('data/genes.fasta', key_function=get_duplicated_gene_name)
 
 def test_duplicated_keys_shortest(remove_index):
     genes = Fasta('data/genes.fasta', key_function=get_duplicated_gene_name, duplicate_action="shortest")


=====================================
tests/test_feature_read_ahead_buffer.py
=====================================
@@ -31,11 +31,11 @@ def test_buffer_exceed(remove_index):
     result = fasta['gi|557361099|gb|KF435150.1|'][0:400].seq.lower()
     assert result == expect
 
- at pytest.mark.xfail(raises=FetchError)
 def test_bounds_error(remove_index):
     fasta = Fasta('data/genes.fasta', read_ahead=300, strict_bounds=True)
-    result = fasta['gi|557361099|gb|KF435150.1|'][100-1:15000].seq.lower()
+    with pytest.raises(FetchError):
+        result = fasta['gi|557361099|gb|KF435150.1|'][100-1:15000].seq.lower()
 
- at pytest.mark.xfail(raises=ValueError)
 def test_buffer_value(remove_index):
-    Fasta('data/genes.fasta', read_ahead=0.5)
\ No newline at end of file
+    with pytest.raises(ValueError):
+        Fasta('data/genes.fasta', read_ahead=0.5)


=====================================
tests/test_feature_sequence_as_raw.py
=====================================
@@ -25,15 +25,15 @@ def test_as_raw_true(remove_index):
     result = fasta['gi|557361099|gb|KF435150.1|'][100-1:150].lower()
     assert result == expect
 
- at pytest.mark.xfail(raises=AttributeError)
 def test_as_raw_false_error(remove_index):
     fasta = Fasta('data/genes.fasta')
-    result = fasta['gi|557361099|gb|KF435150.1|'][100-1:150].lower()
+    with pytest.raises(AttributeError):
+        result = fasta['gi|557361099|gb|KF435150.1|'][100-1:150].lower()
 
- at pytest.mark.xfail(raises=AttributeError)
 def test_as_raw_true_error(remove_index):
     fasta = Fasta('data/genes.fasta', as_raw=True)
-    result = fasta['gi|557361099|gb|KF435150.1|'][100-1:150].seq.lower()
+    with pytest.raises(AttributeError):
+        result = fasta['gi|557361099|gb|KF435150.1|'][100-1:150].seq.lower()
 
 def test_as_raw_type_when_blen_lt_0(remove_index):
     fasta = Fasta('data/genes.fasta', as_raw=True)


=====================================
tests/test_feature_split_char.py
=====================================
@@ -33,6 +33,6 @@ def test_key_function_by_fetch(remove_index):
                          100, 150)
     assert str(result) == expect
 
- at pytest.mark.xfail(raises=ValueError)
 def test_stop(remove_index):
-    fasta = Fasta('data/genes.fasta', split_char='|')
+    with pytest.raises(ValueError):
+        fasta = Fasta('data/genes.fasta', split_char='|')


=====================================
tests/test_fsspec.py
=====================================
@@ -0,0 +1,56 @@
+import os
+
+import pytest
+
+from pyfaidx import Fasta
+
+try:
+    import fsspec
+    from fsspec.core import OpenFile
+except ImportError:
+    pytestmark = pytest.mark.skip
+
+
+ at pytest.fixture(scope="function")
+def openfile_genes_fasta():
+    testdir = os.path.dirname(__file__)
+    genes_fasta = os.path.join(testdir, 'data', 'genes.fasta')
+
+    fs = fsspec.filesystem("memory")
+    with fs.open('genes.fasta', mode='wb') as f:
+        with open(genes_fasta, mode="rb") as g:
+            f.write(g.read())
+
+    try:
+        yield fsspec.open('memory://genes.fasta', mode='rb')
+    finally:
+        fs.rm("/**", recursive=True)
+        assert not fs.ls("/")
+
+
+def test_fsspec_fetch_whole_file(openfile_genes_fasta):
+    _ = Fasta(openfile_genes_fasta)
+
+
+def test_fsspec_default_index(openfile_genes_fasta):
+    _ = Fasta(openfile_genes_fasta)
+
+    fs = openfile_genes_fasta.fs
+    assert fs.isfile(openfile_genes_fasta.path + ".fai")
+    assert fs.size(openfile_genes_fasta.path + ".fai") > 0
+
+
+def test_fsspec_local_index(openfile_genes_fasta, tmp_path):
+    index = tmp_path.joinpath("my_local_index.fai")
+    _ = Fasta(openfile_genes_fasta, indexname=index)
+    assert index.is_file()
+    assert index.stat().st_size > 0
+
+
+def test_fsspec_remote_index(openfile_genes_fasta):
+    f_fai = fsspec.open("memory://some_other_index.fai")
+    _ = Fasta(openfile_genes_fasta, indexname=f_fai)
+
+    fs = f_fai.fs
+    assert fs.isfile("some_other_index.fai")
+    assert fs.size("some_other_index.fai") > 0


=====================================
tests/test_sequence_class.py
=====================================
@@ -19,9 +19,9 @@ def test_negate_metadata():
     seq_neg = -seq
     assert seq_neg.__repr__() == seq.complement[::-1].__repr__()
 
- at pytest.mark.xfail(raises=ValueError)
 def test_seq_invalid():
-    seq_invalid.complement()
+    with pytest.raises(ValueError):
+        seq_invalid.complement()
 
 def test_integer_index():
     assert seq[1].seq == 'T'
@@ -29,15 +29,15 @@ def test_integer_index():
 def test_slice_index():
     assert seq[0:10].seq == 'TTGAAGATTT'
 
- at pytest.mark.xfail(raises=ValueError)
 def test_comp_invalid():
-    complement(comp_invalid)
+    with pytest.raises(ValueError):
+        complement(comp_invalid)
 
- at pytest.mark.xfail(raises=ValueError)
 def test_check_coordinates():
     x = Sequence(name='gi|557361099|gb|KF435150.1|', seq='TTGAAGATTTTGCATGCAGCAGGTGCGCAAGGTGAAATGTTCACTGTTAAA',
                  start=100, end=110)
-    x[:]
+    with pytest.raises(ValueError):
+        _ = x[:]
 
 def test_comp_valid():
     assert complement(comp_valid).startswith("AACTTCTAAAnCG")



View it on GitLab: https://salsa.debian.org/med-team/python-pyfaidx/-/commit/1e05ce518423cfc1bd3db46f012f118fed1f02aa

-- 
View it on GitLab: https://salsa.debian.org/med-team/python-pyfaidx/-/commit/1e05ce518423cfc1bd3db46f012f118fed1f02aa
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20220727/6a55bda7/attachment-0001.htm>


More information about the debian-med-commit mailing list