[med-svn] [Git][med-team/python-dnaio][master] 3 commits: New upstream version 0.4.2
Steffen Möller
gitlab at salsa.debian.org
Thu Apr 30 21:53:10 BST 2020
Steffen Möller pushed to branch master at Debian Med / python-dnaio
Commits:
850ea9ad by Steffen Moeller at 2020-04-30T22:47:56+02:00
New upstream version 0.4.2
- - - - -
d485533b by Steffen Moeller at 2020-04-30T22:47:56+02:00
Update upstream source from tag 'upstream/0.4.2'
Update to upstream version '0.4.2'
with Debian dir 3b33c121e9a8a488b359b1efd16d18bbbea211ab
- - - - -
351a8370 by Steffen Moeller at 2020-04-30T22:50:19+02:00
Preparing for Upload
- - - - -
6 changed files:
- debian/changelog
- debian/control
- src/dnaio/__init__.py
- src/dnaio/_core.pyx
- src/dnaio/writers.py
- tests/test_internal.py
Changes:
=====================================
debian/changelog
=====================================
@@ -1,3 +1,11 @@
+python-dnaio (0.4.2-1) unstable; urgency=medium
+
+ * Team upload.
+ * New upstream version.
+ * Added Rules-Requires-Root: no flag
+
+ -- Steffen Moeller <moeller at debian.org> Thu, 30 Apr 2020 22:48:33 +0200
+
python-dnaio (0.4.1-2) unstable; urgency=medium
* Team upload.
=====================================
debian/control
=====================================
@@ -15,6 +15,7 @@ Standards-Version: 4.5.0
Vcs-Browser: https://salsa.debian.org/med-team/python-dnaio
Vcs-Git: https://salsa.debian.org/med-team/python-dnaio.git
Homepage: https://github.com/marcelm/dnaio
+Rules-Requires-Root: no
Package: python3-dnaio
Architecture: any
=====================================
src/dnaio/__init__.py
=====================================
@@ -28,7 +28,7 @@ import pathlib
from xopen import xopen
-from ._core import Sequence
+from ._core import Sequence, record_names_match as _record_names_match
from .readers import FastaReader, FastqReader
from .writers import FastaWriter, FastqWriter
from .exceptions import UnknownFileFormat, FileFormatError, FastaFormatError, FastqFormatError
@@ -212,21 +212,6 @@ def _detect_format_from_content(file):
return formats.get(first_char, None)
-def _sequence_names_match(r1, r2):
- """
- Check whether the sequence records r1 and r2 have identical names, ignoring a
- suffix of '1' or '2'. Some old paired-end reads have names that end in '/1'
- and '/2'. Also, the fastq-dump tool (used for converting SRA files to FASTQ)
- appends a .1 and .2 to paired-end reads if option -I is used.
- """
- name1 = r1.name.split(None, 1)[0]
- name2 = r2.name.split(None, 1)[0]
- if name1[-1:] in '12' and name2[-1:] in '12':
- name1 = name1[:-1]
- name2 = name2[:-1]
- return name1 == name2
-
-
class PairedSequenceReader:
"""
Read paired-end reads from two files.
@@ -268,7 +253,7 @@ class PairedSequenceReader:
raise FileFormatError(
"Reads are improperly paired. There are more reads in "
"file 1 than in file 2.", line=None) from None
- if not _sequence_names_match(r1, r2):
+ if not _record_names_match(r1.name, r2.name):
raise FileFormatError(
"Reads are improperly paired. Read name '{}' "
"in file 1 does not match '{}' in file 2.".format(r1.name, r2.name), line=None) from None
@@ -303,7 +288,7 @@ class InterleavedSequenceReader:
raise FileFormatError(
"Interleaved input file incomplete: Last record "
"{!r} has no partner.".format(r1.name), line=None) from None
- if not _sequence_names_match(r1, r2):
+ if not _record_names_match(r1.name, r2.name):
raise FileFormatError(
"Reads are improperly paired. Name {!r} "
"(first) does not match {!r} (second).".format(r1.name, r2.name), line=None)
=====================================
src/dnaio/_core.pyx
=====================================
@@ -1,6 +1,6 @@
# cython: language_level=3, emit_code_comments=False
-from libc.string cimport strncmp
+from libc.string cimport strncmp, memcmp
cimport cython
from .exceptions import FastqFormatError
@@ -62,6 +62,16 @@ cdef class Sequence:
def __reduce__(self):
return (Sequence, (self.name, self.sequence, self.qualities))
+ def fastq_bytes(self):
+ s = ('@' + self.name + '\n' + self.sequence + '\n+\n'
+ + self.qualities + '\n')
+ return s.encode('ascii')
+
+ def fastq_bytes_two_headers(self):
+ s = ('@' + self.name + '\n' + self.sequence + '\n+'
+ + self.name + '\n' + self.qualities + '\n')
+ return s.encode('ascii')
+
# It would be nice to be able to have the first parameter be an
# unsigned char[:] (memory view), but this fails with a BufferError
@@ -282,3 +292,21 @@ def fastq_iter(file, sequence_class, Py_ssize_t buffer_size):
'Premature end of file encountered. The incomplete final record was: '
'{!r}'.format(shorten(buf[record_start:pos].decode('latin-1'), 500)),
line=n_records * 4 + lines)
+
+
+def record_names_match(header1: str, header2: str):
+ """
+ Check whether the sequence record ids id1 and id2 are compatible, ignoring a
+ suffix of '1' or '2'. Some old paired-end reads have names that end in '/1'
+ and '/2'. Also, the fastq-dump tool (used for converting SRA files to FASTQ)
+ appends a .1 and .2 to paired-end reads if option -I is used.
+ """
+ # TODO optimize this a bit more
+ cdef:
+ str name1, name2
+ name1 = header1.split()[0]
+ name2 = header2.split()[0]
+ if name1[-1:] in '12' and name2[-1:] in '12':
+ name1 = name1[:-1]
+ name2 = name2[:-1]
+ return name1 == name2
=====================================
src/dnaio/writers.py
=====================================
@@ -81,17 +81,21 @@ class FastqWriter(FileWriter):
def __init__(self, file, two_headers=False, opener=xopen, _close_file=None):
super().__init__(file, opener=opener, _close_file=_close_file)
self._two_headers = two_headers
+ self.write = self._write_two_headers if self._two_headers else self._write
- def write(self, record):
+ def _write(self, record):
"""
Write a Sequence record to the FASTQ file.
- The record object must have attributes .name, .sequence and .qualities.
"""
- name2 = record.name if self._two_headers else ''
- s = ('@' + record.name + '\n' + record.sequence + '\n+'
- + name2 + '\n' + record.qualities + '\n')
- self._file.write(s.encode('ascii'))
+ self._file.write(record.fastq_bytes())
+
+ def _write_two_headers(self, record):
+ """
+ Write a Sequence record to the FASTQ file, repeating the header
+ in the third line after the "+" .
+ """
+ self._file.write(record.fastq_bytes_two_headers())
def writeseq(self, name, sequence, qualities):
self._file.write("@{0:s}\n{1:s}\n+\n{2:s}\n".format(
=====================================
tests/test_internal.py
=====================================
@@ -14,7 +14,7 @@ from dnaio import (
FastaReader, FastqReader, InterleavedSequenceReader,
FastaWriter, FastqWriter, InterleavedSequenceWriter,
PairedSequenceReader)
-from dnaio import _sequence_names_match, Sequence
+from dnaio import _record_names_match, Sequence
# files tests/data/simple.fast{q,a}
@@ -462,12 +462,8 @@ class TestPairedSequenceReader:
(Sequence("r1", "ACG", "HHH"), Sequence("r2", "GTT", "858")),
] == list(psr)
- def test_sequence_names_match(self):
- def match(name1, name2):
- seq1 = Sequence(name1, 'ACGT')
- seq2 = Sequence(name2, 'AACC')
- return _sequence_names_match(seq1, seq2)
-
+ def test_record_names_match(self):
+ match = _record_names_match
assert match('abc', 'abc')
assert match('abc/1', 'abc/2')
assert match('abc.1', 'abc.2')
View it on GitLab: https://salsa.debian.org/med-team/python-dnaio/-/compare/d8a5678d98dc246172a3d5531f209568436e7e92...351a8370a891d0a21add3d0786644731b5d0e6ef
--
View it on GitLab: https://salsa.debian.org/med-team/python-dnaio/-/compare/d8a5678d98dc246172a3d5531f209568436e7e92...351a8370a891d0a21add3d0786644731b5d0e6ef
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200430/7881b908/attachment-0001.html>
More information about the debian-med-commit
mailing list