[med-svn] [Git][med-team/python-dnaio][upstream] New upstream version 0.6.0
Nilesh Patra (@nilesh)
gitlab at salsa.debian.org
Sat Oct 2 23:30:42 BST 2021
Nilesh Patra pushed to branch upstream at Debian Med / python-dnaio
Commits:
00964d9f by Nilesh Patra at 2021-10-03T03:40:03+05:30
New upstream version 0.6.0
- - - - -
17 changed files:
- .github/workflows/ci.yml
- CHANGES.rst
- MANIFEST.in
- README.md
- pyproject.toml
- setup.py
- src/dnaio/__init__.py
- src/dnaio/_core.pyi
- src/dnaio/_core.pyx
- src/dnaio/_util.py
- src/dnaio/exceptions.py
- + src/dnaio/interfaces.py
- + src/dnaio/pairedend.py
- src/dnaio/readers.py
- + src/dnaio/singleend.py
- src/dnaio/writers.py
- tests/test_internal.py
Changes:
=====================================
.github/workflows/ci.yml
=====================================
@@ -26,7 +26,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
- python-version: [3.6, 3.7, 3.8, 3.9]
+ python-version: [3.6, 3.7, 3.8, 3.9, "3.10-dev"]
os: [ubuntu-latest]
include:
- python-version: 3.8
=====================================
CHANGES.rst
=====================================
@@ -2,6 +2,13 @@
Changelog
=========
+
+v0.6.0 (2021-09-28)
+-------------------
+
+* PR #12: Improve FASTQ writing speed twofold (thanks to @rhpvorderman)
+
+
v0.5.2 (2021-09-07)
-------------------
=====================================
MANIFEST.in
=====================================
@@ -1,5 +1,4 @@
include README.md
include LICENSE
include src/dnaio/*.c
-include versioneer.py
include src/dnaio/_version.py
=====================================
README.md
=====================================
@@ -34,7 +34,7 @@ The main interface is the `dnaio.open` function:
# Limitations
-- Multi-line FASTQ files are not supported. You shouldn’t use them anyway.
+- Multi-line FASTQ files are not supported.
- FASTQ parsing is the focus of this library. The FASTA parser is not as optimized.
=====================================
pyproject.toml
=====================================
@@ -1,2 +1,5 @@
[build-system]
requires = ["setuptools", "wheel", "setuptools_scm", "Cython"]
+
+[black.tool]
+line-length = 100
=====================================
setup.py
=====================================
@@ -1,7 +1,7 @@
import os.path
from setuptools import setup, Extension, find_packages
-from distutils.command.sdist import sdist as _sdist
-from distutils.command.build_ext import build_ext as _build_ext
+from distutils.command.sdist import sdist
+from distutils.command.build_ext import build_ext
def no_cythonize(extensions, **_ignore):
@@ -20,18 +20,13 @@ def no_cythonize(extensions, **_ignore):
extension.sources[:] = sources
-extensions = [
- Extension('dnaio._core', sources=['src/dnaio/_core.pyx']),
-]
-
-
-class BuildExt(_build_ext):
+class BuildExt(build_ext):
def run(self):
# If we encounter a PKG-INFO file, then this is likely a .tar.gz/.zip
# file retrieved from PyPI that already includes the pre-cythonized
# extension modules, and then we do not need to run cythonize().
if os.path.exists('PKG-INFO'):
- no_cythonize(extensions)
+ no_cythonize(self.extensions)
else:
# Otherwise, this is a 'developer copy' of the code, and then the
# only sensible thing is to require Cython to be installed.
@@ -40,11 +35,11 @@ class BuildExt(_build_ext):
super().run()
-class SDist(_sdist):
+class SDist(sdist):
def run(self):
# Make sure the compiled Cython files in the distribution are up-to-date
from Cython.Build import cythonize
- cythonize(extensions)
+ cythonize(self.distribution.ext_modules)
super().run()
@@ -58,7 +53,7 @@ setup(
author='Marcel Martin',
author_email='marcel.martin at scilifelab.se',
url='https://github.com/marcelm/dnaio/',
- description='Read FASTA and FASTQ files efficiently',
+ description='Read and write FASTA and FASTQ files efficiently',
long_description=long_description,
long_description_content_type='text/markdown',
license='MIT',
@@ -68,7 +63,9 @@ setup(
extras_require={
'dev': ['Cython', 'pytest'],
},
- ext_modules=extensions,
+ ext_modules=[
+ Extension('dnaio._core', sources=['src/dnaio/_core.pyx']),
+ ],
cmdclass={'build_ext': BuildExt, 'sdist': SDist},
install_requires=['xopen>=0.8.2'],
python_requires='>=3.6',
=====================================
src/dnaio/__init__.py
=====================================
@@ -5,6 +5,10 @@ Sequence I/O: Read and write FASTA and FASTQ files efficiently
__all__ = [
'open',
'Sequence',
+ 'SingleEndReader',
+ 'PairedEndReader',
+ 'SingleEndWriter',
+ 'PairedEndWriter',
'FastaReader',
'FastaWriter',
'FastqReader',
@@ -13,30 +17,35 @@ __all__ = [
'FileFormatError',
'FastaFormatError',
'FastqFormatError',
- 'InterleavedSequenceReader',
- 'InterleavedSequenceWriter',
- 'PairedSequenceReader',
+ 'InterleavedPairedEndReader',
+ 'InterleavedPairedEndWriter',
+ 'TwoFilePairedEndReader',
+ 'TwoFilePairedEndWriter',
'read_chunks',
'read_paired_chunks',
'record_names_match',
'__version__',
]
-import os
-from os import fspath, PathLike
-from contextlib import ExitStack
-import functools
-from typing import Optional, Union, BinaryIO, Tuple, Iterator
+from os import PathLike
+from typing import Optional, Union, BinaryIO
from xopen import xopen
from ._core import Sequence, record_names_match
from .readers import FastaReader, FastqReader
from .writers import FastaWriter, FastqWriter
+from .singleend import _open_single
+from .pairedend import (
+ TwoFilePairedEndReader,
+ TwoFilePairedEndWriter,
+ InterleavedPairedEndReader,
+ InterleavedPairedEndWriter,
+)
from .exceptions import UnknownFileFormat, FileFormatError, FastaFormatError, FastqFormatError
+from .interfaces import SingleEndReader, PairedEndReader, SingleEndWriter, PairedEndWriter
from .chunks import read_chunks, read_paired_chunks
from ._version import version as __version__
-from ._util import _is_path
def open(
@@ -49,46 +58,45 @@ def open(
qualities: Optional[bool] = None,
opener=xopen
) -> Union[
- FastaReader,
- FastaWriter,
- FastqReader,
- FastqWriter,
- "PairedSequenceReader",
- "PairedSequenceWriter",
- "PairedSequenceAppender",
- "InterleavedSequenceReader",
- "InterleavedSequenceWriter",
- "InterleavedSequenceAppender",
+ SingleEndReader,
+ PairedEndReader,
+ SingleEndWriter,
+ PairedEndWriter,
]:
"""
- Open sequence files in FASTA or FASTQ format for reading or writing. This is
- a factory that returns an instance of one of the ...Reader or ...Writer
- classes also defined in this module.
+ Open sequence files in FASTA or FASTQ format for reading or writing.
- file1, file2 -- Paths to regular or compressed files or file-like
+ Parameters:
+ file1:
+ file2:
+ Paths to regular or compressed files or file-like
objects (as str or as pathlib.Path). Use only file1 if data is single-end.
If sequences are paired, use also file2.
-
- mode -- Either 'r' for reading, 'w' for writing or 'a' for appending..
-
- interleaved -- If True, then file1 contains interleaved paired-end data.
+ mode:
+ Either ``'r'`` for reading, ``'w'`` for writing or ``'a'`` for appending.
+ interleaved:
+ If True, then file1 contains interleaved paired-end data.
file2 must be None in this case.
-
- fileformat -- If set to None, the file format is autodetected from the file name
- extension. Set to 'fasta' or 'fastq' to not auto-detect.
-
- qualities -- When mode is 'w' and fileformat is None, this can be set to
- True or False to specify whether the written sequences will have quality
- values. This is is used in two ways:
- * If the output format cannot be determined (unrecognized extension
+ fileformat:
+ If *None*, the file format is autodetected from the file name
+ extension. Set to ``'fasta'`` or ``'fastq'`` to not auto-detect.
+ qualities:
+ When mode is ``'w'`` and fileformat is *None*, this can be set
+ to *True* or *False* to specify whether the written sequences will have
+ quality values. This is is used in two ways:
+
+ - If the output format cannot be determined (unrecognized extension
etc), no exception is raised, but fasta or fastq format is chosen
appropriately.
- * When False (no qualities available), an exception is raised when the
- auto-detected output format is FASTQ.
- opener -- A function that is used to open file1 and file2 if they are not
- already open file-like objects. By default, xopen is used, which can
+ - When False (no qualities available), an exception is raised when the
+ auto-detected output format is FASTQ.
+ opener: A function that is used to open file1 and file2 if they are not
+ already open file-like objects. By default, ``xopen`` is used, which can
also open compressed file formats.
+
+ Return:
+ An instance of one of the ...Reader or ...Writer classes
"""
if mode not in ("r", "w", "a"):
raise ValueError("Mode must be 'r', 'w' or 'a'")
@@ -99,328 +107,19 @@ def open(
if mode in "wa" and file1 == file2:
raise ValueError("The paired-end output files are identical")
if mode == "r":
- return PairedSequenceReader(file1, file2, fileformat, opener=opener)
- elif mode == "w":
- return PairedSequenceWriter(file1, file2, fileformat, qualities, opener=opener)
- else:
- return PairedSequenceAppender(file1, file2, fileformat, qualities, opener=opener)
+ return TwoFilePairedEndReader(file1, file2, fileformat=fileformat, opener=opener)
+ append = mode == "a"
+ return TwoFilePairedEndWriter(
+ file1, file2, fileformat=fileformat, qualities=qualities, opener=opener, append=append
+ )
if interleaved:
if mode == "r":
- return InterleavedSequenceReader(file1, fileformat, opener=opener)
- elif mode == "w":
- return InterleavedSequenceWriter(file1, fileformat, qualities, opener=opener)
- else:
- return InterleavedSequenceAppender(file1, fileformat, qualities, opener=opener)
+ return InterleavedPairedEndReader(file1, fileformat=fileformat, opener=opener)
+ append = mode == "a"
+ return InterleavedPairedEndWriter(
+ file1, fileformat=fileformat, qualities=qualities, opener=opener, append=append)
# The multi-file options have been dealt with, delegate rest to the
# single-file function.
return _open_single(
file1, opener=opener, fileformat=fileformat, mode=mode, qualities=qualities)
-
-
-def _detect_format_from_name(name: str) -> Optional[str]:
- """
- name -- file name
-
- Return 'fasta', 'fastq' or None if the format could not be detected.
- """
- name = name.lower()
- for ext in ('.gz', '.xz', '.bz2'):
- if name.endswith(ext):
- name = name[:-len(ext)]
- break
- name, ext = os.path.splitext(name)
- if ext in ['.fasta', '.fa', '.fna', '.csfasta', '.csfa']:
- return 'fasta'
- elif ext in ['.fastq', '.fq'] or (ext == '.txt' and name.endswith('_sequence')):
- return 'fastq'
- return None
-
-
-def _open_single(
- file_or_path: Union[str, PathLike, BinaryIO],
- opener,
- *,
- fileformat: Optional[str] = None,
- mode: str = "r",
- qualities: Optional[bool] = None,
-) -> Union[FastaReader, FastaWriter, FastqReader, FastqWriter]:
- """
- Open a single sequence file. See description of open() above.
- """
- if mode not in ("r", "w", "a"):
- raise ValueError("Mode must be 'r', 'w' or 'a'")
-
- path: Optional[str]
- if _is_path(file_or_path):
- path = fspath(file_or_path) # type: ignore
- file = opener(path, mode + "b")
- close_file = True
- else:
- if mode == 'r' and not hasattr(file_or_path, 'readinto'):
- raise ValueError(
- 'When passing in an open file-like object, it must have been opened in binary mode')
- file = file_or_path
- if hasattr(file, "name") and isinstance(file.name, str):
- path = file.name
- else:
- path = None
- close_file = False
- fastq_handler = FastqReader if mode == "r" else FastqWriter
- fasta_handler = FastaReader if mode == "r" else FastaWriter
- handlers = {
- 'fastq': functools.partial(fastq_handler, _close_file=close_file),
- 'fasta': functools.partial(fasta_handler, _close_file=close_file),
- }
- if fileformat:
- try:
- handler = handlers[fileformat.lower()]
- except KeyError:
- file.close()
- raise UnknownFileFormat(
- "File format {!r} is unknown (expected 'fasta' or 'fastq').".format(fileformat))
- return handler(file)
-
- if path is not None:
- fileformat = _detect_format_from_name(path)
- if fileformat is None and mode == 'w' and qualities is not None:
- # Format not recognized, but we know whether to use a format with or without qualities
- fileformat = 'fastq' if qualities else 'fasta'
-
- if mode == 'r' and fileformat is None:
- fileformat = _detect_format_from_content(file)
- if fileformat is None:
- name = getattr(file, "name", repr(file))
- file.close()
- raise UnknownFileFormat(
- 'Could not determine whether file "{}" is FASTA or FASTQ. The file extension was '
- 'not available or not recognized and the first character in the file is '
- 'unexpected.'.format(name))
-
- if fileformat is None:
- assert mode == 'w'
- extra = " because the output file name is not available" if path is None else ""
- file.close()
- raise UnknownFileFormat(
- "Auto-detection of the output file format (FASTA/FASTQ) failed" + extra)
-
- if fileformat == 'fastq' and mode in "wa" and qualities is False:
- file.close()
- raise ValueError(
- 'Output format cannot be FASTQ since no quality values are available.')
-
- return handlers[fileformat](file)
-
-
-def _detect_format_from_content(file: BinaryIO) -> Optional[str]:
- """
- Return 'fasta', 'fastq' or None
- """
- if file.seekable():
- first_char = file.read(1)
- if file.tell() > 0:
- file.seek(-1, 1)
- else:
- first_char = file.peek(1)[0:1] # type: ignore
- formats = {
- b'@': 'fastq',
- b'>': 'fasta',
- b'#': 'fasta', # Some FASTA variants allow comments
- b'': 'fastq', # Pretend FASTQ for empty input
- }
- return formats.get(first_char, None)
-
-
-class PairedSequenceReader:
- """
- Read paired-end reads from two files.
-
- Wraps two BinaryFileReader instances, making sure that reads are properly
- paired.
- """
- paired = True
-
- def __init__(
- self,
- file1: Union[str, PathLike, BinaryIO],
- file2: Union[str, PathLike, BinaryIO],
- fileformat: Optional[str] = None,
- opener=xopen,
- ):
- with ExitStack() as stack:
- self.reader1 = stack.enter_context(_open_single(file1, opener=opener, fileformat=fileformat))
- self.reader2 = stack.enter_context(_open_single(file2, opener=opener, fileformat=fileformat))
- self._close = stack.pop_all().close
- self.delivers_qualities = self.reader1.delivers_qualities
-
- def __repr__(self) -> str:
- return "PairedSequenceReader(file1={}, file2={})".format(self.reader1, self.reader2)
-
- def __iter__(self) -> Iterator[Tuple[Sequence, Sequence]]:
- """
- Iterate over the paired reads. Each item is a pair of Sequence objects.
- """
- # Avoid usage of zip() below since it will consume one item too many.
- it1, it2 = iter(self.reader1), iter(self.reader2)
- while True:
- try:
- r1 = next(it1)
- except StopIteration:
- # End of file 1. Make sure that file 2 is also at end.
- try:
- next(it2)
- raise FileFormatError(
- "Reads are improperly paired. There are more reads in "
- "file 2 than in file 1.", line=None) from None
- except StopIteration:
- pass
- break
- try:
- r2 = next(it2)
- except StopIteration:
- raise FileFormatError(
- "Reads are improperly paired. There are more reads in "
- "file 1 than in file 2.", line=None) from None
- if not record_names_match(r1.name, r2.name):
- raise FileFormatError(
- "Reads are improperly paired. Read name '{}' "
- "in file 1 does not match '{}' in file 2.".format(r1.name, r2.name), line=None) from None
- yield (r1, r2)
-
- def close(self) -> None:
- self._close()
-
- def __enter__(self):
- return self
-
- def __exit__(self, *exc):
- self.close()
-
-
-class InterleavedSequenceReader:
- """
- Read paired-end reads from an interleaved FASTQ file.
- """
- paired = True
-
- def __init__(
- self,
- file: Union[str, PathLike, BinaryIO],
- fileformat: Optional[str] = None,
- opener=xopen,
- ):
- reader = _open_single(file, opener=opener, fileformat=fileformat)
- assert isinstance(reader, (FastaReader, FastqReader)) # for Mypy
- self.reader = reader
- self.delivers_qualities = self.reader.delivers_qualities
-
- def __repr__(self) -> str:
- return "InterleavedSequenceReader({})".format(self.reader)
-
- def __iter__(self) -> Iterator[Tuple[Sequence, Sequence]]:
- it = iter(self.reader)
- for r1 in it:
- try:
- r2 = next(it)
- except StopIteration:
- raise FileFormatError(
- "Interleaved input file incomplete: Last record "
- "{!r} has no partner.".format(r1.name), line=None) from None
- if not record_names_match(r1.name, r2.name):
- raise FileFormatError(
- "Reads are improperly paired. Name {!r} "
- "(first) does not match {!r} (second).".format(r1.name, r2.name), line=None)
- yield (r1, r2)
-
- def close(self) -> None:
- self.reader.close()
-
- def __enter__(self):
- return self
-
- def __exit__(self, *args):
- self.close()
-
-
-class PairedSequenceWriter:
- _mode = "w"
-
- def __init__(
- self,
- file1: Union[str, PathLike, BinaryIO],
- file2: Union[str, PathLike, BinaryIO],
- fileformat: Optional[str] = "fastq",
- qualities: Optional[bool] = None,
- opener=xopen,
- ):
- with ExitStack() as stack:
- self._writer1: Union[FastaWriter, FastqWriter]
- self._writer2: Union[FastaWriter, FastqWriter]
- self._writer1 = stack.enter_context(
- _open_single(
- file1, opener=opener, fileformat=fileformat, mode=self._mode, qualities=qualities))
- self._writer2 = stack.enter_context(
- _open_single(
- file2, opener=opener, fileformat=fileformat, mode=self._mode, qualities=qualities))
- self._close = stack.pop_all().close
-
- def __repr__(self) -> str:
- return "{}({}, {})".format(self.__class__.__name__, self._writer1, self._writer2)
-
- def write(self, read1, read2) -> None:
- self._writer1.write(read1)
- self._writer2.write(read2)
-
- def close(self) -> None:
- self._close()
-
- def __enter__(self):
- # TODO do not allow this twice
- return self
-
- def __exit__(self, *args):
- self.close()
-
-
-class PairedSequenceAppender(PairedSequenceWriter):
- _mode = "a"
-
-
-class InterleavedSequenceWriter:
- """
- Write paired-end reads to an interleaved FASTA or FASTQ file
- """
- _mode = "w"
-
- def __init__(
- self,
- file: Union[str, PathLike, BinaryIO],
- fileformat: Optional[str] = "fastq",
- qualities: Optional[bool] = None,
- opener=xopen,
- ):
- writer = _open_single(
- file, opener=opener, fileformat=fileformat, mode=self._mode, qualities=qualities)
- assert isinstance(writer, (FastaWriter, FastqWriter)) # only for Mypy
- self._writer = writer
-
- def __repr__(self) -> str:
- return "{}({})".format(self.__class__.__name__, self._writer)
-
- def write(self, read1: Sequence, read2: Sequence) -> None:
- self._writer.write(read1)
- self._writer.write(read2)
-
- def close(self) -> None:
- self._writer.close()
-
- def __enter__(self):
- # TODO do not allow this twice
- return self
-
- def __exit__(self, *args):
- self.close()
-
-
-class InterleavedSequenceAppender(InterleavedSequenceWriter):
- _mode = "a"
=====================================
src/dnaio/_core.pyi
=====================================
@@ -10,6 +10,7 @@ class Sequence:
def __repr__(self) -> str: ...
def __len__(self) -> int: ...
def __richcmp__(self, other: Sequence, op: int) -> bool: ...
+ def qualities_as_bytes(self) -> bytes: ...
def fastq_bytes(self) -> bytes: ...
def fastq_bytes_two_headers(self) -> bytes: ...
=====================================
src/dnaio/_core.pyx
=====================================
@@ -1,6 +1,7 @@
# cython: language_level=3, emit_code_comments=False
-from libc.string cimport strncmp, memcmp
+from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_AS_STRING
+from libc.string cimport strncmp, memcmp, memcpy
cimport cython
from .exceptions import FastqFormatError
@@ -9,9 +10,18 @@ from ._util import shorten
cdef class Sequence:
"""
- A record in a FASTA or FASTQ file. For FASTA, the qualities attribute
+ A sequencing read with read name/id and (optional) qualities
+
+ If qualities are available, they are as
+ For a Sequence a FASTA file
+ record containing a read in a FASTA or FASTQ file. For FASTA, the qualities attribute
is None. For FASTQ, qualities is a string and it contains the qualities
- encoded as ascii(qual+33).
+ encoded as ASCII(qual+33).
+
+ Attributes:
+ name (str): The read description
+ sequence (str):
+ qualities (str):
"""
cdef:
public str name
@@ -31,7 +41,13 @@ cdef class Sequence:
rname, len(qualities), len(sequence)))
def __getitem__(self, key):
- """slicing"""
+ """
+ Slice this Sequence. If the qualities attribute is not None, it is
+ sliced accordingly. The read name is copied unchanged.
+
+ Returns:
+ A new Sequence object with a sliced sequence.
+ """
return self.__class__(
self.name,
self.sequence[key],
@@ -45,6 +61,10 @@ cdef class Sequence:
shorten(self.name), shorten(self.sequence), qstr)
def __len__(self):
+ """
+ Returns:
+ The number of characters in this sequence
+ """
return len(self.sequence)
def __richcmp__(self, other, int op):
@@ -62,15 +82,66 @@ cdef class Sequence:
def __reduce__(self):
return (Sequence, (self.name, self.sequence, self.qualities))
+ def qualities_as_bytes(self):
+ """Return the qualities as a bytes object.
+
+ This is a faster version of qualities.encode('ascii')."""
+ return self.qualities.encode('ascii')
+
def fastq_bytes(self):
- s = ('@' + self.name + '\n' + self.sequence + '\n+\n'
- + self.qualities + '\n')
- return s.encode('ascii')
+ """Return the entire FASTQ record as bytes which can be written
+ into a file."""
+ # Convert to ASCII bytes sequences first as these have a one-to-one
+ # relation between size and number of bytes
+ # Unlike decoding, ascii is not slower than latin-1. This is because
+ # CPython performs a call to PyUnicodeCheck on both occassions. This
+ # determines the type of the Unicode object. In fact, the ascii encode
+ # is slightly faster because the check for PyASCIIObject is performed
+ # first.
+ cdef bytes name = self.name.encode('ascii')
+ cdef bytes sequence = self.sequence.encode('ascii')
+ cdef bytes qualities = self.qualities.encode('ascii')
+ cdef Py_ssize_t name_length = len(name)
+ cdef Py_ssize_t sequence_length = len(sequence)
+ cdef Py_ssize_t qualities_length = len(qualities)
+
+ # Since Cython will generate code above that is a 100% sure to generate
+ # bytes objects, we can call Python C-API functions that don't perform
+ # checks on the object.
+ cdef char * name_ptr = PyBytes_AS_STRING(name)
+ cdef char * sequence_ptr = PyBytes_AS_STRING(sequence)
+ cdef char * qualities_ptr = PyBytes_AS_STRING(qualities)
+
+ # Total size is name + sequence + qualities + 4 newlines + '+' and an
+ # '@' to be put in front of the name.
+ cdef Py_ssize_t total_size = name_length + sequence_length + qualities_length + 6
+
+ # This is the canonical way to create an uninitialized bytestring of given size
+ cdef bytes retval = PyBytes_FromStringAndSize(NULL, total_size)
+ cdef char * retval_ptr = PyBytes_AS_STRING(retval)
+
+ # Write the sequences into the bytestring at the correct positions.
+ cdef Py_ssize_t cursor
+ retval_ptr[0] = b"@"
+ memcpy(retval_ptr + 1, name_ptr, name_length)
+ cursor = name_length + 1
+ retval_ptr[cursor] = b"\n"; cursor += 1
+ memcpy(retval_ptr + cursor, sequence_ptr, sequence_length)
+ cursor += sequence_length
+ retval_ptr[cursor] = b"\n"; cursor += 1
+ retval_ptr[cursor] = b"+"; cursor += 1
+ retval_ptr[cursor] = b"\n"; cursor += 1
+ memcpy(retval_ptr + cursor, qualities_ptr, qualities_length)
+ cursor += qualities_length
+ retval_ptr[cursor] = b"\n"
+ return retval
def fastq_bytes_two_headers(self):
- s = ('@' + self.name + '\n' + self.sequence + '\n+'
- + self.name + '\n' + self.qualities + '\n')
- return s.encode('ascii')
+ """
+ Return this record in FASTQ format as a bytes object where the header (after the @) is
+ repeated on the third line.
+ """
+ return f"@{self.name}\n{self.sequence}\n+{self.name}\n{self.qualities}\n".encode("ascii")
# It would be nice to be able to have the first parameter be an
@@ -202,6 +273,11 @@ def fastq_iter(file, sequence_class, Py_ssize_t buffer_size):
name_length = pos - endskip - record_start - 1
name_encoded = c_buf + record_start + 1
# .decode('latin-1') is 50% faster than .decode('ascii')
+ # This is because PyUnicode_DecodeLatin1 is an alias for
+ # _PyUnicode_FromUCS1. Which directly copies the bytes into a
+ # string object. No operations are taking place. With
+ # PyUnicode_DecodeASCII, all characters are checked whether they
+ # exceed 128.
name = c_buf[record_start+1:pos-endskip].decode('latin-1')
pos += 1
=====================================
src/dnaio/_util.py
=====================================
@@ -5,6 +5,7 @@ def _is_path(obj: object) -> bool:
"""
Return whether the given object looks like a path (str, pathlib.Path or pathlib2.Path)
"""
+ # TODO
# pytest uses pathlib2.Path objects on Python 3.5 for its tmp_path fixture.
# On Python 3.6+, this function can be replaced with isinstance(obj, os.PathLike)
import sys
=====================================
src/dnaio/exceptions.py
=====================================
@@ -13,16 +13,22 @@ class FileFormatError(Exception):
self.line = line # starts at 0!
def __str__(self):
- line = 'unknown line' if self.line is None else 'line {}'.format(self.line + 1)
- return 'Error in {} file at {}: {}'.format(self.format, line, self.message)
+ line = "unknown line" if self.line is None else f"line {self.line + 1}"
+ return f"Error in {self.format} file at {line}: {self.message}"
class FastqFormatError(FileFormatError):
- format = 'FASTQ'
+ """
+ The FASTQ file is not formatted correctly
+ """
+ format = "FASTQ"
class FastaFormatError(FileFormatError):
- format = 'FASTA'
+ """
+ The FASTA file is not formatted correctly
+ """
+ format = "FASTA"
class UnknownFileFormat(Exception):
=====================================
src/dnaio/interfaces.py
=====================================
@@ -0,0 +1,28 @@
+from abc import ABC, abstractmethod
+from typing import Iterator, Tuple
+
+from dnaio import Sequence
+
+
+class SingleEndReader(ABC):
+ @abstractmethod
+ def __iter__(self) -> Iterator[Sequence]:
+ pass
+
+
+class PairedEndReader(ABC):
+ @abstractmethod
+ def __iter__(self) -> Iterator[Tuple[Sequence, Sequence]]:
+ pass
+
+
+class SingleEndWriter(ABC):
+ @abstractmethod
+ def write(self, record: Sequence) -> None:
+ pass
+
+
+class PairedEndWriter(ABC):
+ @abstractmethod
+ def write(self, record1: Sequence, record2: Sequence) -> None:
+ pass
=====================================
src/dnaio/pairedend.py
=====================================
@@ -0,0 +1,233 @@
+from contextlib import ExitStack
+from os import PathLike
+from typing import Union, BinaryIO, Optional, Iterator, Tuple
+
+from xopen import xopen
+
+from ._core import Sequence, record_names_match
+from .exceptions import FileFormatError
+from .interfaces import PairedEndReader, PairedEndWriter
+from .readers import FastaReader, FastqReader
+from .writers import FastaWriter, FastqWriter
+from .singleend import _open_single
+
+
+class TwoFilePairedEndReader(PairedEndReader):
+ """
+ Read paired-end reads from two files.
+
+ Wraps two BinaryFileReader instances, making sure that reads are properly
+ paired.
+ """
+
+ paired = True
+
+ def __init__(
+ self,
+ file1: Union[str, PathLike, BinaryIO],
+ file2: Union[str, PathLike, BinaryIO],
+ *,
+ fileformat: Optional[str] = None,
+ opener=xopen,
+ ):
+ with ExitStack() as stack:
+ self.reader1 = stack.enter_context(
+ _open_single(file1, opener=opener, fileformat=fileformat)
+ )
+ self.reader2 = stack.enter_context(
+ _open_single(file2, opener=opener, fileformat=fileformat)
+ )
+ self._close = stack.pop_all().close
+ self.delivers_qualities = self.reader1.delivers_qualities
+
+ def __repr__(self) -> str:
+ return f"{self.__class__.__name__}(file1={self.reader1}, file2={self.reader2})"
+
+ def __iter__(self) -> Iterator[Tuple[Sequence, Sequence]]:
+ """
+ Iterate over the paired reads. Each item is a pair of Sequence objects.
+ """
+ # Avoid usage of zip() below since it will consume one item too many.
+ it1, it2 = iter(self.reader1), iter(self.reader2)
+ while True:
+ try:
+ r1 = next(it1)
+ except StopIteration:
+ # End of file 1. Make sure that file 2 is also at end.
+ try:
+ next(it2)
+ raise FileFormatError(
+ "Reads are improperly paired. There are more reads in "
+ "file 2 than in file 1.",
+ line=None,
+ ) from None
+ except StopIteration:
+ pass
+ break
+ try:
+ r2 = next(it2)
+ except StopIteration:
+ raise FileFormatError(
+ "Reads are improperly paired. There are more reads in "
+ "file 1 than in file 2.",
+ line=None,
+ ) from None
+ if not record_names_match(r1.name, r2.name):
+ raise FileFormatError(
+ f"Reads are improperly paired. Read name '{r1.name}' "
+ f"in file 1 does not match '{r2.name}' in file 2.",
+ line=None,
+ ) from None
+ yield (r1, r2)
+
+ def close(self) -> None:
+ self._close()
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *exc):
+ self.close()
+
+
+class InterleavedPairedEndReader(PairedEndReader):
+ """
+ Read paired-end reads from an interleaved FASTQ file.
+ """
+
+ paired = True
+
+ def __init__(
+ self,
+ file: Union[str, PathLike, BinaryIO],
+ *,
+ fileformat: Optional[str] = None,
+ opener=xopen,
+ ):
+ reader = _open_single(file, opener=opener, fileformat=fileformat)
+ assert isinstance(reader, (FastaReader, FastqReader)) # for Mypy
+ self.reader = reader
+ self.delivers_qualities = self.reader.delivers_qualities
+
+ def __repr__(self) -> str:
+ return f"{self.__class__.__name__}({self.reader})"
+
+ def __iter__(self) -> Iterator[Tuple[Sequence, Sequence]]:
+ it = iter(self.reader)
+ for r1 in it:
+ try:
+ r2 = next(it)
+ except StopIteration:
+ raise FileFormatError(
+ "Interleaved input file incomplete: Last record "
+ f"'{r1.name}' has no partner.",
+ line=None,
+ ) from None
+ if not record_names_match(r1.name, r2.name):
+ raise FileFormatError(
+ f"Reads are improperly paired. Name '{r1.name}' "
+ f"(first) does not match '{r2.name}' (second).",
+ line=None,
+ )
+ yield (r1, r2)
+
+ def close(self) -> None:
+ self.reader.close()
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *args):
+ self.close()
+
+
+class TwoFilePairedEndWriter(PairedEndWriter):
+ def __init__(
+ self,
+ file1: Union[str, PathLike, BinaryIO],
+ file2: Union[str, PathLike, BinaryIO],
+ *,
+ fileformat: Optional[str] = "fastq",
+ qualities: Optional[bool] = None,
+ opener=xopen,
+ append: bool = False,
+ ):
+ mode = "a" if append else "w"
+ with ExitStack() as stack:
+ self._writer1: Union[FastaWriter, FastqWriter]
+ self._writer2: Union[FastaWriter, FastqWriter]
+ self._writer1 = stack.enter_context(
+ _open_single(
+ file1,
+ opener=opener,
+ fileformat=fileformat,
+ mode=mode,
+ qualities=qualities,
+ )
+ )
+ self._writer2 = stack.enter_context(
+ _open_single(
+ file2,
+ opener=opener,
+ fileformat=fileformat,
+ mode=mode,
+ qualities=qualities,
+ )
+ )
+ self._close = stack.pop_all().close
+
+ def __repr__(self) -> str:
+ return f"{self.__class__.__name__}({self._writer1}, {self._writer2})"
+
+ def write(self, read1, read2) -> None:
+ self._writer1.write(read1)
+ self._writer2.write(read2)
+
+ def close(self) -> None:
+ self._close()
+
+ def __enter__(self):
+ # TODO do not allow this twice
+ return self
+
+ def __exit__(self, *args):
+ self.close()
+
+
+class InterleavedPairedEndWriter(PairedEndWriter):
+ """
+ Write paired-end reads to an interleaved FASTA or FASTQ file
+ """
+
+ def __init__(
+ self,
+ file: Union[str, PathLike, BinaryIO],
+ *,
+ fileformat: Optional[str] = "fastq",
+ qualities: Optional[bool] = None,
+ opener=xopen,
+ append: bool = False,
+ ):
+ mode = "a" if append else "w"
+ writer = _open_single(
+ file, opener=opener, fileformat=fileformat, mode=mode, qualities=qualities
+ )
+ assert isinstance(writer, (FastaWriter, FastqWriter)) # only for Mypy
+ self._writer = writer
+
+ def __repr__(self) -> str:
+ return f"{self.__class__.__name__}({self._writer})"
+
+ def write(self, read1: Sequence, read2: Sequence) -> None:
+ self._writer.write(read1)
+ self._writer.write(read2)
+
+ def close(self) -> None:
+ self._writer.close()
+
+ def __enter__(self):
+ # TODO do not allow this twice
+ return self
+
+ def __exit__(self, *args):
+ self.close()
=====================================
src/dnaio/readers.py
=====================================
@@ -7,9 +7,11 @@ import io
from typing import Union, BinaryIO, Optional, Iterator, List
from xopen import xopen
+
from ._core import fastq_iter as _fastq_iter, Sequence
from ._util import shorten as _shorten
from .exceptions import FastaFormatError
+from .interfaces import SingleEndReader
class BinaryFileReader:
@@ -20,7 +22,9 @@ class BinaryFileReader:
paired: bool = False
mode: str = 'rb'
- def __init__(self, file: Union[str, BinaryIO], opener=xopen, _close_file: Optional[bool] = None):
+ def __init__(
+ self, file: Union[str, BinaryIO], *, opener=xopen, _close_file: Optional[bool] = None
+ ):
"""
The file is a path or a file-like object. In both cases, the file may
be compressed (.gz, .bz2, .xz).
@@ -35,7 +39,7 @@ class BinaryFileReader:
self._file = file
def __repr__(self) -> str:
- return "{}({!r})".format(self.__class__.__name__, getattr(self._file, "name", self._file))
+ return f"{self.__class__.__name__}('{getattr(self._file, 'name', self._file)}')"
def close(self) -> None:
if self._close_on_exit and self._file is not None:
@@ -51,7 +55,7 @@ class BinaryFileReader:
self.close()
-class FastaReader(BinaryFileReader):
+class FastaReader(BinaryFileReader, SingleEndReader):
"""
Reader for FASTA files.
"""
@@ -59,6 +63,7 @@ class FastaReader(BinaryFileReader):
def __init__(
self,
file: Union[str, BinaryIO],
+ *,
keep_linebreaks: bool = False,
sequence_class=Sequence,
opener=xopen,
@@ -98,8 +103,7 @@ class FastaReader(BinaryFileReader):
seq.append(line)
else:
raise FastaFormatError(
- "Expected '>' at beginning of record, but got {!r}."
- .format(_shorten(line)), line=i)
+ f"Expected '>' at beginning of record, but got '{_shorten(line)}'.", line=i)
if name is not None:
yield self.sequence_class(name, self._delimiter.join(seq), None)
@@ -107,7 +111,7 @@ class FastaReader(BinaryFileReader):
f.detach()
-class FastqReader(BinaryFileReader):
+class FastqReader(BinaryFileReader, SingleEndReader):
"""
Reader for FASTQ files. Does not support multi-line FASTQ files.
"""
@@ -115,6 +119,7 @@ class FastqReader(BinaryFileReader):
def __init__(
self,
file: Union[str, BinaryIO],
+ *,
sequence_class=Sequence,
buffer_size: int = 1048576,
opener=xopen,
=====================================
src/dnaio/singleend.py
=====================================
@@ -0,0 +1,126 @@
+import functools
+import os
+from typing import Optional, Union, BinaryIO
+
+from .exceptions import UnknownFileFormat
+from .readers import FastaReader, FastqReader
+from .writers import FastaWriter, FastqWriter
+from ._util import _is_path
+
+
+def _open_single(
+ file_or_path: Union[str, os.PathLike, BinaryIO],
+ opener,
+ *,
+ fileformat: Optional[str] = None,
+ mode: str = "r",
+ qualities: Optional[bool] = None,
+) -> Union[FastaReader, FastaWriter, FastqReader, FastqWriter]:
+ """
+ Open a single sequence file. See description of open() above.
+ """
+ if mode not in ("r", "w", "a"):
+ raise ValueError("Mode must be 'r', 'w' or 'a'")
+
+ path: Optional[str]
+ if _is_path(file_or_path):
+ path = os.fspath(file_or_path) # type: ignore
+ file = opener(path, mode + "b")
+ close_file = True
+ else:
+ if mode == "r" and not hasattr(file_or_path, "readinto"):
+ raise ValueError(
+ "When passing in an open file-like object, it must have been opened in binary mode"
+ )
+ file = file_or_path
+ if hasattr(file, "name") and isinstance(file.name, str):
+ path = file.name
+ else:
+ path = None
+ close_file = False
+ fastq_handler = FastqReader if mode == "r" else FastqWriter
+ fasta_handler = FastaReader if mode == "r" else FastaWriter
+ handlers = {
+ "fastq": functools.partial(fastq_handler, _close_file=close_file),
+ "fasta": functools.partial(fasta_handler, _close_file=close_file),
+ }
+ if fileformat:
+ try:
+ handler = handlers[fileformat.lower()]
+ except KeyError:
+ file.close()
+ raise UnknownFileFormat(
+ f"File format '{fileformat}' is unknown (expected 'fasta' or 'fastq')."
+ )
+ return handler(file)
+
+ if path is not None:
+ fileformat = _detect_format_from_name(path)
+ if fileformat is None and mode == "w" and qualities is not None:
+ # Format not recognized, but we know whether to use a format with or without qualities
+ fileformat = "fastq" if qualities else "fasta"
+
+ if mode == "r" and fileformat is None:
+ fileformat = _detect_format_from_content(file)
+ if fileformat is None:
+ name = getattr(file, "name", repr(file))
+ file.close()
+ raise UnknownFileFormat(
+ f"Could not determine whether file '{name}' is FASTA or FASTQ. The file extension "
+ "is not available or not recognized, and the first character in the file is "
+ "unexpected."
+ )
+
+ if fileformat is None:
+ assert mode == "w"
+ extra = " because the output file name is not available" if path is None else ""
+ file.close()
+ raise UnknownFileFormat(
+ "Auto-detection of the output file format (FASTA/FASTQ) failed" + extra
+ )
+
+ if fileformat == "fastq" and mode in "wa" and qualities is False:
+ file.close()
+ raise ValueError(
+ "Output format cannot be FASTQ since no quality values are available."
+ )
+
+ return handlers[fileformat](file)
+
+
+def _detect_format_from_name(name: str) -> Optional[str]:
+ """
+ name -- file name
+
+ Return 'fasta', 'fastq' or None if the format could not be detected.
+ """
+ name = name.lower()
+ for ext in (".gz", ".xz", ".bz2"):
+ if name.endswith(ext):
+ name = name[: -len(ext)]
+ break
+ name, ext = os.path.splitext(name)
+ if ext in [".fasta", ".fa", ".fna", ".csfasta", ".csfa"]:
+ return "fasta"
+ elif ext in [".fastq", ".fq"] or (ext == ".txt" and name.endswith("_sequence")):
+ return "fastq"
+ return None
+
+
+def _detect_format_from_content(file: BinaryIO) -> Optional[str]:
+ """
+ Return 'fasta', 'fastq' or None
+ """
+ if file.seekable():
+ first_char = file.read(1)
+ if file.tell() > 0:
+ file.seek(-1, 1)
+ else:
+ first_char = file.peek(1)[0:1] # type: ignore
+ formats = {
+ b"@": "fastq",
+ b">": "fasta",
+ b"#": "fasta", # Some FASTA variants allow comments
+ b"": "fastq", # Pretend FASTQ for empty input
+ }
+ return formats.get(first_char, None)
=====================================
src/dnaio/writers.py
=====================================
@@ -5,12 +5,14 @@ from xopen import xopen
from . import Sequence
from ._util import _is_path
+from .interfaces import SingleEndWriter
class FileWriter:
def __init__(
self,
file: Union[PathLike, str, BinaryIO],
+ *,
opener=xopen,
_close_file: Optional[bool] = None,
):
@@ -22,7 +24,7 @@ class FileWriter:
self._close_on_exit = bool(_close_file)
def __repr__(self) -> str:
- return "{}({!r})".format(self.__class__.__name__, getattr(self._file, "name", self._file))
+ return f"{self.__class__.__name__}('{getattr(self._file, 'name', self._file)}')"
def close(self) -> None:
if self._close_on_exit:
@@ -37,7 +39,7 @@ class FileWriter:
self.close()
-class FastaWriter(FileWriter):
+class FastaWriter(FileWriter, SingleEndWriter):
"""
Write FASTA-formatted sequences to a file.
"""
@@ -45,6 +47,7 @@ class FastaWriter(FileWriter):
def __init__(
self,
file: Union[PathLike, str, BinaryIO],
+ *,
line_length: Optional[int] = None,
opener=xopen,
_close_file: Optional[bool] = None,
@@ -57,7 +60,7 @@ class FastaWriter(FileWriter):
self.line_length = line_length if line_length != 0 else None
def __repr__(self) -> str:
- return "FastaWriter('{}')".format(getattr(self._file, "name", self._file))
+ return f"FastaWriter('{getattr(self._file, 'name', self._file)}')"
def write(self, name_or_record, sequence: Optional[str] = None):
"""Write an entry to the the FASTA file.
@@ -89,31 +92,41 @@ class FastaWriter(FileWriter):
self._file.write(text.encode('ascii'))
-class FastqWriter(FileWriter):
+class FastqWriter(FileWriter, SingleEndWriter):
"""
Write sequences with qualities in FASTQ format.
- FASTQ files are formatted like this:
- @read name
- SEQUENCE
- +
- QUALITIS
+ FASTQ files are formatted like this::
+
+ @read name
+ AACCGGTT
+ +
+ FF,:F,,F
"""
file_mode = 'wb'
def __init__(
self,
file: Union[PathLike, str, BinaryIO],
+ *,
two_headers: bool = False,
opener=xopen,
_close_file: Optional[bool] = None,
):
super().__init__(file, opener=opener, _close_file=_close_file)
self._two_headers = two_headers
- self.write = self._write_two_headers if self._two_headers else self._write
+ # setattr avoids a complaint from Mypy
+ setattr(self, "write", self._write_two_headers if self._two_headers else self._write)
def __repr__(self) -> str:
- return "FastqWriter('{}')".format(getattr(self._file, "name", self._file))
+ return f"FastqWriter('{getattr(self._file, 'name', self._file)}')"
+
+ def write(self, record: Sequence) -> None:
+ """
+ Dummy method to make it possible to instantiate this class.
+ The correct write method is assigned in the constructor.
+ """
+ assert False
def _write(self, record: Sequence) -> None:
"""
@@ -130,5 +143,4 @@ class FastqWriter(FileWriter):
self._file.write(record.fastq_bytes_two_headers())
def writeseq(self, name: str, sequence: str, qualities: str) -> None:
- self._file.write("@{0:s}\n{1:s}\n+\n{2:s}\n".format(
- name, sequence, qualities).encode('ascii'))
+ self._file.write(f"@{name:s}\n{sequence:s}\n+\n{qualities:s}\n".encode("ascii"))
=====================================
tests/test_internal.py
=====================================
@@ -11,9 +11,9 @@ from pytest import raises, mark
import dnaio
from dnaio import (
FileFormatError, FastaFormatError, FastqFormatError,
- FastaReader, FastqReader, InterleavedSequenceReader,
- FastaWriter, FastqWriter, InterleavedSequenceWriter,
- PairedSequenceReader,
+ FastaReader, FastqReader, InterleavedPairedEndReader,
+ FastaWriter, FastqWriter, InterleavedPairedEndWriter,
+ TwoFilePairedEndReader,
)
from dnaio import record_names_match, Sequence
from dnaio.writers import FileWriter
@@ -35,6 +35,14 @@ class TestSequence:
with raises(ValueError):
Sequence(name="name", sequence="ACGT", qualities="#####")
+ def test_fastq_bytes(self):
+ assert Sequence("name", "ACGT", "====").fastq_bytes() == \
+ b"@name\nACGT\n+\n====\n"
+
+ def test_fastq_bytes_two_headers(self):
+ assert Sequence("name", "ACGT", "====").fastq_bytes_two_headers() == \
+ b"@name\nACGT\n+name\n====\n"
+
class TestFastaReader:
def test_file(self):
@@ -329,7 +337,7 @@ class TestInterleavedReader:
Sequence('read3/2', 'TGTTATTAATATCAAGTTGG', '#HHHHHHHHHHHHHHHHHHH')
),
]
- with InterleavedSequenceReader("tests/data/interleaved.fastq") as isr:
+ with InterleavedPairedEndReader("tests/data/interleaved.fastq") as isr:
reads = list(isr)
assert reads == expected
@@ -340,14 +348,14 @@ class TestInterleavedReader:
def test_missing_partner(self):
s = BytesIO(b'@r1\nACG\n+\nHHH\n')
with raises(FileFormatError) as info:
- with InterleavedSequenceReader(s) as isr:
+ with InterleavedPairedEndReader(s) as isr:
list(isr)
assert "Interleaved input file incomplete" in info.value.message
def test_incorrectly_paired(self):
s = BytesIO(b'@r1/1\nACG\n+\nHHH\n at wrong_name\nTTT\n+\nHHH\n')
with raises(FileFormatError) as info:
- with InterleavedSequenceReader(s) as isr:
+ with InterleavedPairedEndReader(s) as isr:
list(isr)
assert "Reads are improperly paired" in info.value.message
@@ -447,7 +455,7 @@ class TestInterleavedWriter:
),
]
bio = BytesIO()
- with InterleavedSequenceWriter(bio) as writer:
+ with InterleavedPairedEndWriter(bio) as writer:
for read1, read2 in reads:
writer.write(read1, read2)
assert bio.getvalue() == (
@@ -462,7 +470,7 @@ class TestPairedSequenceReader:
def test_read(self):
s1 = BytesIO(b'@r1\nACG\n+\nHHH\n')
s2 = BytesIO(b'@r2\nGTT\n+\n858\n')
- with PairedSequenceReader(s1, s2) as psr:
+ with TwoFilePairedEndReader(s1, s2) as psr:
assert [
(Sequence("r1", "ACG", "HHH"), Sequence("r2", "GTT", "858")),
] == list(psr)
@@ -500,7 +508,7 @@ class TestPairedSequenceReader:
s2 = BytesIO(b'@r1\nACG\n+\nHHH\n')
with raises(FileFormatError) as info:
- with PairedSequenceReader(s1, s2) as psr:
+ with TwoFilePairedEndReader(s1, s2) as psr:
list(psr)
assert "There are more reads in file 2 than in file 1" in info.value.message
@@ -509,7 +517,7 @@ class TestPairedSequenceReader:
s2 = BytesIO(b'')
with raises(FileFormatError) as info:
- with PairedSequenceReader(s1, s2) as psr:
+ with TwoFilePairedEndReader(s1, s2) as psr:
list(psr)
assert "There are more reads in file 1 than in file 2" in info.value.message
@@ -517,7 +525,7 @@ class TestPairedSequenceReader:
s1 = BytesIO(b'@r1/1\nACG\n+\nHHH\n')
s2 = BytesIO(b'@wrong_name\nTTT\n+\nHHH\n')
with raises(FileFormatError) as info:
- with PairedSequenceReader(s1, s2) as psr:
+ with TwoFilePairedEndReader(s1, s2) as psr:
list(psr)
assert "Reads are improperly paired" in info.value.message
View it on GitLab: https://salsa.debian.org/med-team/python-dnaio/-/commit/00964d9fc99ded67eb43eb39c9d8f4c745c1076e
--
View it on GitLab: https://salsa.debian.org/med-team/python-dnaio/-/commit/00964d9fc99ded67eb43eb39c9d8f4c745c1076e
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20211002/deea0124/attachment-0001.htm>
More information about the debian-med-commit
mailing list