[med-svn] [Git][med-team/python-pysam][upstream] New upstream version 0.15.2+ds
Michael R. Crusoe
gitlab at salsa.debian.org
Wed Jan 16 13:20:30 GMT 2019
Michael R. Crusoe pushed to branch upstream at Debian Med / python-pysam
Commits:
804729d0 by Michael R. Crusoe at 2019-01-16T09:47:32Z
New upstream version 0.15.2+ds
- - - - -
15 changed files:
- .travis.yml
- MANIFEST.in
- NEWS
- doc/release.rst
- pysam/libcalignedsegment.pyx
- pysam/libcalignmentfile.pyx
- pysam/libcbcf.pyx
- pysam/libcfaidx.pyx
- pysam/libchtslib.pyx
- pysam/libcutils.pyx
- pysam/version.py
- tests/AlignedSegment_test.py
- tests/AlignmentFileHeader_test.py
- tests/AlignmentFilePileup_test.py
- tests/faidx_test.py
Changes:
=====================================
.travis.yml
=====================================
@@ -3,7 +3,6 @@ os:
- osx
language: c
-sudo: false
env:
matrix:
=====================================
MANIFEST.in
=====================================
@@ -50,8 +50,8 @@ include htslib/htslib.pc.in
include htslib/htslib/*.h
include htslib/cram/*.c
include htslib/cram/*.h
-include htslib/win/*.c
-include htslib/win/*.h
+include htslib/os/*.c
+include htslib/os/*.h
include cy_build.py
include pysam.py
include requirements.txt
=====================================
NEWS
=====================================
@@ -5,6 +5,17 @@ http://pysam.readthedocs.io/en/latest/release.html
Release notes
=============
+Release 0.15.2
+==============
+
+Bugfix release.
+
+* [#746] catch pileup itorator out-of-scope segfaults
+* [#747] fix faixd fetch with region
+* [#748] increase max_pos to (1<<31)-1
+* [#645] Add missing macOS stub files in `MANIFEST.in`, @SoapZA
+* [#737] Fix bug in get_aligned_pairs, @bkohrn
+
Release 0.15.1
==============
=====================================
doc/release.rst
=====================================
@@ -7,6 +7,17 @@ Release 0.15.1
Bugfix release.
+* [#746] catch pileup itorator out-of-scope segfaults
+* [#747] fix faixd fetch with region
+* [#748] increase max_pos to (1<<31)-1
+* [#645] Add missing macOS stub files in `MANIFEST.in`, @SoapZA
+* [#737] Fix bug in get_aligned_pairs, @bkohrn
+
+Release 0.15.1
+==============
+
+Bugfix release.
+
* [#716] raise ValueError if tid is out of range when writing
* [#697] release version using cython 0.28.5 for python 3.7
compatibility
=====================================
pysam/libcalignedsegment.pyx
=====================================
@@ -402,11 +402,18 @@ cdef inline pack_tags(tags):
# use array.tostring() to retrieve byte representation and
# save as bytes
datafmt = "2sBBI%is" % (len(value) * DATATYPE2FORMAT[typecode][1])
- args.extend([pytag[:2],
- ord("B"),
- typecode,
- len(value),
- force_bytes(value.tostring())])
+ if IS_PYTHON3:
+ args.extend([pytag[:2],
+ ord("B"),
+ typecode,
+ len(value),
+ value.tobytes()])
+ else:
+ args.extend([pytag[:2],
+ ord("B"),
+ typecode,
+ len(value),
+ force_bytes(value.tostring())])
else:
if typecode == 0:
@@ -1945,6 +1952,8 @@ cdef class AlignedSegment:
else:
for i from pos <= i < pos + l:
result.append((None, i))
+ else:
+ r_idx += l
pos += l
elif op == BAM_CHARD_CLIP:
@@ -2845,6 +2854,10 @@ cdef class PileupColumn:
# out of sync.
for x from 0 <= x < self.n_pu:
p = &(self.plp[0][x])
+ if p == NULL:
+ raise ValueError(
+ "pileup buffer out of sync - most likely use of iterator "
+ "outside loop")
if pileup_base_qual_skip(p, self.min_base_quality):
continue
pileups.append(makePileupRead(p, self.header))
@@ -2887,8 +2900,15 @@ cdef class PileupColumn:
cdef uint32_t c = 0
cdef uint32_t cnt = 0
cdef bam_pileup1_t * p = NULL
+ if self.plp == NULL or self.plp[0] == NULL:
+ raise ValueError("PileupColumn accessed after iterator finished")
+
for x from 0 <= x < self.n_pu:
p = &(self.plp[0][x])
+ if p == NULL:
+ raise ValueError(
+ "pileup buffer out of sync - most likely use of iterator "
+ "outside loop")
if pileup_base_qual_skip(p, self.min_base_quality):
continue
cnt += 1
@@ -2956,11 +2976,18 @@ cdef class PileupColumn:
cdef uint8_t rb = 0
cdef uint8_t * buf = self.buf
cdef bam_pileup1_t * p = NULL
+
+ if self.plp == NULL or self.plp[0] == NULL:
+ raise ValueError("PileupColumn accessed after iterator finished")
# todo: reference sequence to count matches/mismatches
# todo: convert assertions to exceptions
for x from 0 <= x < self.n_pu:
p = &(self.plp[0][x])
+ if p == NULL:
+ raise ValueError(
+ "pileup buffer out of sync - most likely use of iterator "
+ "outside loop")
if pileup_base_qual_skip(p, self.min_base_quality):
continue
# see samtools pileup_seq
@@ -3062,6 +3089,11 @@ cdef class PileupColumn:
result = []
for x from 0 <= x < self.n_pu:
p = &(self.plp[0][x])
+ if p == NULL:
+ raise ValueError(
+ "pileup buffer out of sync - most likely use of iterator "
+ "outside loop")
+
if p.qpos < p.b.core.l_qseq:
c = bam_get_qual(p.b)[p.qpos]
else:
@@ -3079,11 +3111,19 @@ cdef class PileupColumn:
list: a list of quality scores
"""
+ if self.plp == NULL or self.plp[0] == NULL:
+ raise ValueError("PileupColumn accessed after iterator finished")
+
cdef uint32_t x = 0
cdef bam_pileup1_t * p = NULL
result = []
for x from 0 <= x < self.n_pu:
p = &(self.plp[0][x])
+ if p == NULL:
+ raise ValueError(
+ "pileup buffer out of sync - most likely use of iterator "
+ "outside loop")
+
if pileup_base_qual_skip(p, self.min_base_quality):
continue
result.append(p.b.core.qual)
@@ -3097,12 +3137,19 @@ cdef class PileupColumn:
list: a list of read positions
"""
+ if self.plp == NULL or self.plp[0] == NULL:
+ raise ValueError("PileupColumn accessed after iterator finished")
cdef uint32_t x = 0
cdef bam_pileup1_t * p = NULL
result = []
for x from 0 <= x < self.n_pu:
p = &(self.plp[0][x])
+ if p == NULL:
+ raise ValueError(
+ "pileup buffer out of sync - most likely use of iterator "
+ "outside loop")
+
if pileup_base_qual_skip(p, self.min_base_quality):
continue
result.append(p.qpos)
@@ -3116,11 +3163,19 @@ cdef class PileupColumn:
list: a list of query names at pileup column position.
"""
+ if self.plp == NULL or self.plp[0] == NULL:
+ raise ValueError("PileupColumn accessed after iterator finished")
+
cdef uint32_t x = 0
cdef bam_pileup1_t * p = NULL
result = []
for x from 0 <= x < self.n_pu:
p = &(self.plp[0][x])
+ if p == NULL:
+ raise ValueError(
+ "pileup buffer out of sync - most likely use of iterator "
+ "outside loop")
+
if pileup_base_qual_skip(p, self.min_base_quality):
continue
result.append(charptr_to_str(pysam_bam_get_qname(p.b)))
=====================================
pysam/libcalignmentfile.pyx
=====================================
@@ -1,4 +1,3 @@
-
# cython: embedsignature=True
# cython: profile=True
########################################################
@@ -62,6 +61,8 @@ import warnings
import array
from libc.errno cimport errno, EPIPE
from libc.string cimport strcmp, strpbrk, strerror
+from libc.stdint cimport INT32_MAX
+
from cpython cimport array as c_array
from cpython.version cimport PY_MAJOR_VERSION
@@ -94,7 +95,8 @@ IndexStats = collections.namedtuple("IndexStats",
########################################################
## global variables
# maximum genomic coordinace
-cdef int MAX_POS = 2 << 29
+# for some reason, using 'int' causes overlflow
+cdef int MAX_POS = (1 << 31) - 1
# valid types for SAM headers
VALID_HEADER_TYPES = {"HD" : collections.Mapping,
@@ -1314,8 +1316,9 @@ cdef class AlignmentFile(HTSFile):
an iterator over genomic positions.
"""
- cdef int rtid, rstart, rstop, has_coord
-
+ cdef int rtid, has_coord
+ cdef int32_t rstart, rstop
+
if not self.is_open:
raise ValueError("I/O operation on closed file")
@@ -2054,7 +2057,6 @@ cdef class IteratorRowRegion(IteratorRow):
IteratorRow.__init__(self, samfile,
multiple_iterators=multiple_iterators)
-
with nogil:
self.iter = sam_itr_queryi(
self.index,
=====================================
pysam/libcbcf.pyx
=====================================
@@ -111,7 +111,7 @@ __all__ = ['VariantFile',
## Constants
########################################################################
-cdef int MAX_POS = 2 << 29
+cdef int MAX_POS = (1 << 31) - 1
cdef tuple VALUE_TYPES = ('Flag', 'Integer', 'Float', 'String')
cdef tuple METADATA_TYPES = ('FILTER', 'INFO', 'FORMAT', 'CONTIG', 'STRUCTURED', 'GENERIC')
cdef tuple METADATA_LENGTHS = ('FIXED', 'VARIABLE', 'A', 'G', 'R')
=====================================
pysam/libcfaidx.pyx
=====================================
@@ -287,19 +287,20 @@ cdef class FastaFile:
cdef char *ref
cdef int rstart, rend
- reference, rstart, rend = parse_region(reference, start, end, region)
+ contig, rstart, rend = parse_region(reference, start, end, region)
- if reference is None:
+ if contig is None:
raise ValueError("no sequence/region supplied.")
if rstart == rend:
return ""
- ref = reference
+ contig_b = force_bytes(contig)
+ ref = contig_b
with nogil:
length = faidx_seq_len(self.fastafile, ref)
if length == -1:
- raise KeyError("sequence '%s' not present" % reference)
+ raise KeyError("sequence '%s' not present" % contig)
if rstart >= length:
return ""
@@ -315,7 +316,7 @@ cdef class FastaFile:
if errno:
raise IOError(errno, strerror(errno))
else:
- raise ValueError("failure when retrieving sequence on '%s'" % reference)
+ raise ValueError("failure when retrieving sequence on '%s'" % contig)
try:
return charptr_to_str(seq)
=====================================
pysam/libchtslib.pyx
=====================================
@@ -9,10 +9,9 @@
from posix.unistd cimport dup
from libc.errno cimport errno
+from libc.stdint cimport INT32_MAX
from cpython cimport PyBytes_FromStringAndSize
-
from pysam.libchtslib cimport *
-
from pysam.libcutils cimport force_bytes, force_str, charptr_to_str, charptr_to_str_w_len
from pysam.libcutils cimport encode_filename, from_string_and_size
@@ -25,7 +24,7 @@ from pysam.libcutils cimport encode_filename, from_string_and_size
import os
import io
import re
-from warnings import warn
+from warnings import warn
########################################################################
@@ -41,7 +40,7 @@ DEF SEEK_CUR = 1
DEF SEEK_END = 2
# maximum genomic coordinace
-cdef int MAX_POS = 2 << 29
+cdef int MAX_POS = (1 << 31) - 1
cdef tuple FORMAT_CATEGORIES = ('UNKNOWN', 'ALIGNMENTS', 'VARIANTS', 'INDEX', 'REGIONS')
cdef tuple FORMATS = ('UNKNOWN', 'BINARY_FORMAT', 'TEXT_FORMAT', 'SAM', 'BAM', 'BAI', 'CRAM', 'CRAI',
@@ -630,8 +629,8 @@ cdef class HTSFile(object):
"""
cdef int rtid
- cdef long long rstart
- cdef long long rstop
+ cdef int32_t rstart
+ cdef int32_t rstop
if reference is not None:
if contig is not None:
@@ -644,7 +643,7 @@ cdef class HTSFile(object):
stop = end
if contig is None and tid is None and region is None:
- return 0, 0, 0, 0
+ return 0, 0, 0, MAX_POS
rtid = -1
rstart = 0
=====================================
pysam/libcutils.pyx
=====================================
@@ -12,6 +12,7 @@ from cpython cimport PyBytes_Check, PyUnicode_Check
from cpython cimport array as c_array
from libc.stdlib cimport calloc, free
from libc.string cimport strncpy
+from libc.stdint cimport INT32_MAX, int32_t
from libc.stdio cimport fprintf, stderr, fflush
from libc.stdio cimport stdout as c_stdout
from posix.fcntl cimport open as c_open, O_WRONLY
@@ -24,7 +25,7 @@ from libcbcftools cimport bcftools_main, bcftools_set_stdout, bcftools_set_stder
#####################################################################
# hard-coded constants
-cdef int MAX_POS = 2 << 29
+cdef int MAX_POS = (1 << 31) - 1
#################################################################
# Utility functions for quality string conversions
@@ -48,7 +49,10 @@ cpdef array_to_qualitystring(c_array.array qualities, int offset=33):
for x from 0 <= x < len(qualities):
result[x] = qualities[x] + offset
- return force_str(result.tostring())
+ if IS_PYTHON3:
+ return force_str(result.tobytes())
+ else:
+ return result.tostring()
cpdef qualities_to_qualitystring(qualities, int offset=33):
@@ -198,51 +202,58 @@ cpdef parse_region(contig=None,
for invalid or out of bounds regions.
"""
- cdef long long rstart
- cdef long long rend
+ cdef int32_t rstart
+ cdef int32_t rstop
- if contig is not None:
- reference = contig
- if stop is not None:
- end = stop
+
+ if reference is not None:
+ if contig is not None:
+ raise ValueError('contig and reference should not both be specified')
+ contig = reference
+
+ if contig is not None and region is not None:
+ raise ValueError('contig/reference and region should not both be specified')
+
+ if end is not None:
+ if stop is not None:
+ raise ValueError('stop and end should not both be specified')
+ stop = end
+
+ if contig is None and region is None:
+ raise ValueError("neither contig nor region are given")
rstart = 0
- rend = MAX_POS
- if start != None:
+ rstop = MAX_POS
+ if start is not None:
try:
rstart = start
except OverflowError:
raise ValueError('start out of range (%i)' % start)
- if end != None:
+ if stop is not None:
try:
- rend = end
+ rstop = stop
except OverflowError:
- raise ValueError('end out of range (%i)' % end)
+ raise ValueError('stop out of range (%i)' % stop)
if region:
- region = force_str(region)
if ":" in region:
contig, coord = region.split(":")
parts = coord.split("-")
rstart = int(parts[0]) - 1
if len(parts) >= 1:
- rend = int(parts[1])
+ rstop = int(parts[1])
else:
contig = region
- if not reference:
- return None, 0, 0
-
+ if rstart > rstop:
+ raise ValueError('invalid coordinates: start (%i) > stop (%i)' % (rstart, rstop))
if not 0 <= rstart < MAX_POS:
raise ValueError('start out of range (%i)' % rstart)
- if not 0 <= rend <= MAX_POS:
- raise ValueError('end out of range (%i)' % rend)
- if rstart > rend:
- raise ValueError(
- 'invalid region: start (%i) > end (%i)' % (rstart, rend))
+ if not 0 <= rstop <= MAX_POS:
+ raise ValueError('stop out of range (%i)' % rstop)
- return force_bytes(reference), rstart, rend
+ return contig, rstart, rstop
def _pysam_dispatch(collection,
=====================================
pysam/version.py
=====================================
@@ -1,5 +1,5 @@
# pysam versioning information
-__version__ = "0.15.0"
+__version__ = "0.15.2"
# TODO: upgrade number
__samtools_version__ = "1.9"
=====================================
tests/AlignedSegment_test.py
=====================================
@@ -430,6 +430,35 @@ class TestAlignedSegment(ReadTest):
(6, 27), (7, 28),
(8, 29), (9, 30)])
+ def test_equivalence_matches_only_and_with_seq(self):
+ a = self.build_read()
+ a.query_sequence = "ACGT" * 2
+ a.cigarstring = "4M1D4M"
+ a.set_tag("MD", "4^x4")
+ full = (list(zip(range(0, 4), range(20, 24), "ACGT")) +
+ [(None, 24, "x")] +
+ list(zip(range(4, 8), range(25, 29), "ACGT")))
+ self.assertEqual(
+ a.get_aligned_pairs(matches_only=False, with_seq=True), full)
+
+ self.assertEqual(
+ a.get_aligned_pairs(matches_only=True, with_seq=True),
+ [x for x in full if x[0] is not None and x[1] is not None])
+
+ a = self.build_read()
+ a.query_sequence = "ACGT" * 2
+ a.cigarstring = "4M1N4M"
+ a.set_tag("MD", "8")
+ full = (list(zip(range(0, 4), range(20, 24), "ACGT")) +
+ [(None, 24, None)] +
+ list(zip(range(4, 8), range(25, 29), "ACGT")))
+ self.assertEqual(
+ a.get_aligned_pairs(matches_only=False, with_seq=True), full)
+
+ self.assertEqual(
+ a.get_aligned_pairs(matches_only=True, with_seq=True),
+ [x for x in full if x[0] is not None and x[1] is not None])
+
def test_get_aligned_pairs_lowercase_md(self):
a = self.build_read()
a.query_sequence = "A" * 10
=====================================
tests/AlignmentFileHeader_test.py
=====================================
@@ -121,6 +121,7 @@ class TestHeaderConstruction(unittest.TestCase):
self.compare_headers(header, self.header_without_text)
self.check_name_mapping(header)
+
class TestHeaderSAM(unittest.TestCase):
"""testing header manipulation"""
@@ -287,6 +288,7 @@ class TestHeaderWriteRead(unittest.TestCase):
def check_read_write(self, flag_write, header):
fn = get_temp_filename()
+ print(fn)
with pysam.AlignmentFile(
fn,
flag_write,
@@ -299,17 +301,30 @@ class TestHeaderWriteRead(unittest.TestCase):
with pysam.AlignmentFile(fn) as inf:
read_header = inf.header
- os.unlink(fn)
+ # os.unlink(fn)
self.compare_headers(header, read_header)
+ expected_lengths = dict([(x["SN"], x["LN"]) for x in header["SQ"]])
+ self.assertEqual(expected_lengths,
+ dict(zip(read_header.references,
+ read_header.lengths)))
def test_SAM(self):
self.check_read_write("wh", self.header)
def test_BAM(self):
self.check_read_write("wb", self.header)
-
+
def test_CRAM(self):
header = copy.copy(self.header)
- # for CRAM, \t needs to be quoted:
- header['PG'][1]['CL'] = re.sub(r"\t", r"\\\\t", header['PG'][1]['CL'])
+ if "PG" in header:
+ # for CRAM, \t needs to be quoted:
+ header['PG'][1]['CL'] = re.sub(r"\t", r"\\\\t", header['PG'][1]['CL'])
self.check_read_write("wc", header)
+
+
+class TestHeaderLargeContigs(TestHeaderWriteRead):
+ """see issue 741"""
+
+ header = {'SQ': [{'LN': 2147483647, 'SN': 'chr1'},
+ {'LN': 1584, 'SN': 'chr2'}],
+ 'HD': {'VN': '1.0'}}
=====================================
tests/AlignmentFilePileup_test.py
=====================================
@@ -115,7 +115,7 @@ class TestPileupReadSelection(unittest.TestCase):
fastafile=self.fastafile,
redo_baq=True)
self.check_equal(refs, iterator)
-
+
class TestPileupReadSelectionFastafile(TestPileupReadSelection):
'''test pileup functionality - backwards compatibility'''
@@ -206,10 +206,19 @@ class TestPileupObjects(unittest.TestCase):
'''test if exception is raised if pileup col is accessed after
iterator is exhausted.'''
+ max_n = 0
for pileupcol in self.samfile.pileup():
- pass
-
- self.assertRaises(ValueError, getattr, pileupcol, "pileups")
+ if max_n < pileupcol.n:
+ max_col = pileupcol
+ max_n = pileupcol.n
+
+ self.assertRaises(ValueError, getattr, max_col, "pileups")
+ self.assertRaises(ValueError, max_col.get_query_sequences)
+ self.assertRaises(ValueError, max_col.get_num_aligned)
+ self.assertRaises(ValueError, max_col.get_query_qualities)
+ self.assertRaises(ValueError, max_col.get_mapping_qualities)
+ self.assertRaises(ValueError, max_col.get_query_positions)
+ self.assertRaises(ValueError, max_col.get_query_names)
class TestIteratorColumnBAM(unittest.TestCase):
=====================================
tests/faidx_test.py
=====================================
@@ -45,6 +45,13 @@ class TestFastaFile(unittest.TestCase):
self.assertRaises(ValueError, self.file.fetch, "chr1", 20, 10)
self.assertRaises(KeyError, self.file.fetch, "chr3", 0, 100)
+ def test_fetch_with_region_and_contig_raises_exception(self):
+ self.assertRaises(ValueError, self.file.fetch, "chr1", 10, 20, "chr1:11-20")
+
+ def test_fetch_with_region_is_equivalent(self):
+ self.assertEqual(self.file.fetch("chr1", 10, 20),
+ self.file.fetch(region="chr1:11-20"))
+
def testLength(self):
self.assertEqual(len(self.file), 2)
@@ -60,7 +67,7 @@ class TestFastaFilePathIndex(unittest.TestCase):
filename = os.path.join(BAM_DATADIR, "ex1.fa")
data_suffix = ".fa"
-
+
def test_raise_exception_if_index_is_missing(self):
self.assertRaises(IOError,
pysam.FastaFile,
@@ -223,22 +230,28 @@ class TestRemoteFileFTP(unittest.TestCase):
if not check_url(self.url):
return
- with pysam.Fastafile(self.url) as f:
- self.assertEqual(
- len(f.fetch("chr1", 0, 1000)),
- 1000)
-
+ try:
+ with pysam.Fastafile(self.url) as f:
+ self.assertEqual(
+ len(f.fetch("chr1", 0, 1000)),
+ 1000)
+ except (OSError, IOError):
+ pass
+
def test_sequence_lengths_are_available(self):
if not check_url(self.url):
return
- with pysam.Fastafile(self.url) as f:
- self.assertEqual(len(f.references), 3366)
- self.assertTrue("chr1" in f.references)
- self.assertEqual(f.lengths[0],
- 248956422)
- self.assertEqual(f.get_reference_length("chr1"),
- 248956422)
+ try:
+ with pysam.Fastafile(self.url) as f:
+ self.assertEqual(len(f.references), 3366)
+ self.assertTrue("chr1" in f.references)
+ self.assertEqual(f.lengths[0],
+ 248956422)
+ self.assertEqual(f.get_reference_length("chr1"),
+ 248956422)
+ except (OSError, IOError):
+ pass
class TestFastqRecord(unittest.TestCase):
View it on GitLab: https://salsa.debian.org/med-team/python-pysam/commit/804729d07198184729ad0e30d76070d1ce9e8fce
--
View it on GitLab: https://salsa.debian.org/med-team/python-pysam/commit/804729d07198184729ad0e30d76070d1ce9e8fce
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20190116/971ba8bc/attachment-0001.html>
More information about the debian-med-commit
mailing list