[med-svn] [Git][med-team/python-pysam][upstream] New upstream version 0.15.4+ds
Michael R. Crusoe
gitlab at salsa.debian.org
Thu Jan 23 14:01:30 GMT 2020
Michael R. Crusoe pushed to branch upstream at Debian Med / python-pysam
Commits:
f4cfe1fb by Michael R. Crusoe at 2020-01-23T13:46:18+01:00
New upstream version 0.15.4+ds
- - - - -
14 changed files:
- .travis.yml
- doc/release.rst
- pysam/bcftools.py
- pysam/libcalignedsegment.pyx
- pysam/libcalignmentfile.pyx
- pysam/libcbcf.pyx
- pysam/libcfaidx.pyx
- pysam/libchtslib.pyx
- pysam/libcutils.pyx
- pysam/samtools.py
- pysam/version.py
- requirements.txt
- setup.py
- tests/AlignedSegment_test.py
Changes:
=====================================
.travis.yml
=====================================
@@ -4,6 +4,11 @@ os:
language: c
+stages:
+ - test
+ - name: deploy
+ if: tag IS present
+
env:
matrix:
- CONDA_PY=2.7
@@ -12,12 +17,26 @@ env:
global:
- PYSAM_LINKING_TEST=1
- TWINE_USERNAME=grepall
- - secure: 'OcwwP8/o21+SGW0UVAnnCQwllhGSCq2HJzpI9EhX3kh6J9RTkyx/+drkg45bx1Z5u8zymuAFappEYzlpzqZE886XezkjOYGVa/u+Coqr1oT/BEJHFCkCA4o26yESp7Zy8aNj/juhB7Rfa77pIDXBayqTzbALz/AURMtZapasB18='
+ - secure: bTbky3Un19NAl62lix8bMLmBv9IGNhFkRXlZH+B253nYub7jwQwPQKum3ct9ea+XHJT5//uM0B8WAF6eyugpNkPQ7+S7SEH5BJuCt30nv6qvGhSO2AffZKeHEDnfW2kqGrivn87TqeomlSBlO742CD/V0wOIUwkTT9tutd+E7FU=
-_deploy_common: &deploy_common
- if: tag IS present
+_cibw_common: &cibw_common
+ addons: {}
install:
- - python3 -m pip install cibuildwheel twine
+ - python3 -m pip install cibuildwheel>=1.1.0 twine
+ script:
+ - set -e
+ - cibuildwheel --output-dir dist
+ - twine check dist/*
+ - twine upload --skip-existing dist/*
+
+_cibw_linux: &cibw_linux
+ stage: deploy
+ os: linux
+ language: python
+ python: '3.5'
+ services:
+ - docker
+ <<: *cibw_common
matrix:
include:
@@ -25,11 +44,6 @@ matrix:
os: linux
language: python
python: '3.5'
- services:
- - docker
- env:
- - CIBW_BEFORE_BUILD="yum install -y zlib-devel bzip2-devel xz-devel && pip install -r requirements.txt"
- - CIBW_ENVIRONMENT='HTSLIB_CONFIGURE_OPTIONS="--disable-libcurl"'
addons:
apt:
packages:
@@ -37,28 +51,36 @@ matrix:
- g++
- libcurl4-openssl-dev # for libcurl support in sdist
- libssl-dev # for s3 support in sdist
- <<: *deploy_common
+ install:
+ - python3 -m pip install Cython twine
script:
- set -e
- - cibuildwheel --output-dir dist
- - python3 -m pip install Cython
- python3 setup.py build_ext --inplace
- python3 setup.py sdist
- twine check dist/*
- twine upload --skip-existing dist/*
+ - <<: *cibw_linux
+ env:
+ - CIBW_BUILD="*_x86_64"
+ - CIBW_BEFORE_BUILD="yum install -y zlib-devel bzip2-devel xz-devel && python -m pip install -r requirements.txt"
+ - CIBW_ENVIRONMENT='HTSLIB_CONFIGURE_OPTIONS="--disable-libcurl"'
+ - CIBW_REPAIR_WHEEL_COMMAND_LINUX='auditwheel repair -L . -w {dest_dir} {wheel}'
+ - CIBW_TEST_COMMAND='python -c "import pysam"'
+ - <<: *cibw_linux
+ env:
+ - CIBW_BUILD="*_i686"
+ - CIBW_BEFORE_BUILD="yum install -y zlib-devel bzip2-devel xz-devel && python -m pip install -r requirements.txt"
+ - CIBW_ENVIRONMENT='HTSLIB_CONFIGURE_OPTIONS="--disable-libcurl"'
+ - CIBW_REPAIR_WHEEL_COMMAND_LINUX='auditwheel repair -L . -w {dest_dir} {wheel}'
+ - CIBW_TEST_COMMAND='python -c "import pysam"'
- stage: deploy
os: osx
language: generic
env:
- - CIBW_BEFORE_BUILD="pip install -r requirements.txt"
+ - CIBW_BEFORE_BUILD="python -m pip install -r requirements.txt"
- CIBW_ENVIRONMENT='HTSLIB_CONFIGURE_OPTIONS="--disable-libcurl"'
- addons: {}
- <<: *deploy_common
- script:
- - set -e
- - cibuildwheel --output-dir dist
- - twine check dist/*
- - twine upload --skip-existing dist/*
+ - CIBW_TEST_COMMAND='python -c "import pysam"'
+ <<: *cibw_common
addons:
apt:
=====================================
doc/release.rst
=====================================
@@ -2,6 +2,26 @@
Release notes
=============
+Release 0.15.4
+==============
+
+Bugfix release. Principal reason for release is to update cython
+version in order to fix pip install pysam with python 3.8.
+
+* [#879] Fix add_meta function in libcbcf.pyx, so meta-information
+ lines in header added with this function have double-quoting rules
+ in accordance to rules specified in VCF4.2 and VCF4.3 specifications
+* [#863] Force arg to bytes to support non-ASCII encoding
+* [#875] Bump minimum Cython version
+* [#868] Prevent segfault on Python 2.7 AlignedSegment.compare(other=None)
+* [#867] Fix wheel building on TravisCI
+* [#863] Force arg to bytes to support non-ASCII encoding
+* [#799] disambiguate interpretation of bcf_read return code
+* [#841] Fix silent truncation of FASTQ with bad q strings
+* [#846] Prevent segmentation fault on ID, when handling malformed records
+* [#829] Run configure with the correct CC/CFLAGS/LDFLAGS env vars
+
+
Release 0.15.3
==============
=====================================
pysam/bcftools.py
=====================================
@@ -1,4 +1,4 @@
-from utils import PysamDispatcher
+from pysam.utils import PysamDispatcher
BCFTOOLS_DISPATCH = [
"index",
=====================================
pysam/libcalignedsegment.pyx
=====================================
@@ -133,7 +133,7 @@ cdef inline uint8_t strand_mark_char(uint8_t ch, bam1_t *b):
else:
return toupper(ch)
-
+
cdef inline bint pileup_base_qual_skip(bam_pileup1_t * p, uint32_t threshold):
cdef uint32_t c
if p.qpos < p.b.core.l_qseq:
@@ -143,7 +143,7 @@ cdef inline bint pileup_base_qual_skip(bam_pileup1_t * p, uint32_t threshold):
if c < threshold:
return True
return False
-
+
cdef inline char map_typecode_htslib_to_python(uint8_t s):
"""map an htslib typecode to the corresponding python typecode
@@ -223,7 +223,7 @@ cdef inline uint8_t get_tag_typecode(value, value_type=None):
"""
# 0 is unknown typecode
cdef char typecode = 0
-
+
if value_type is None:
if isinstance(value, int):
if value < 0:
@@ -341,7 +341,7 @@ cdef inline pack_tags(tags):
"""
fmts, args = ["<"], []
- # htslib typecode
+ # htslib typecode
cdef uint8_t typecode
for tag in tags:
@@ -394,7 +394,7 @@ cdef inline pack_tags(tags):
if typecode not in DATATYPE2FORMAT:
raise ValueError("invalid value type '{}' ({})".format(chr(typecode), array.typecode))
-
+
# use array.tostring() to retrieve byte representation and
# save as bytes
datafmt = "2sBBI%is" % (len(value) * DATATYPE2FORMAT[typecode][1])
@@ -416,7 +416,7 @@ cdef inline pack_tags(tags):
typecode = get_tag_typecode(value)
if typecode == 0:
raise ValueError("could not deduce typecode for value {}".format(value))
-
+
if typecode == 'a' or typecode == 'A' or typecode == 'Z' or typecode == 'H':
value = force_bytes(value)
@@ -427,7 +427,7 @@ cdef inline pack_tags(tags):
datafmt = "2sB%is" % (len(value)+1)
else:
datafmt = "2sB%s" % DATATYPE2FORMAT[typecode][0]
-
+
args.extend([pytag[:2],
typecode,
value])
@@ -924,9 +924,10 @@ cdef class AlignedSegment:
Parameters
----------
- header -- :class:`~pysam.AlignmentHeader` object to map numerical
- identifiers to chromosome names. If not given, an empty
- header is created.
+ header:
+ :class:`~pysam.AlignmentHeader` object to map numerical
+ identifiers to chromosome names. If not given, an empty
+ header is created.
'''
# Now only called when instances are created from Python
@@ -957,9 +958,9 @@ cdef class AlignedSegment:
self.cache_query_alignment_qualities = None
self.cache_query_sequence = None
self.cache_query_alignment_sequence = None
-
+
self.header = header
-
+
def __dealloc__(self):
bam_destroy1(self._delegate)
@@ -999,6 +1000,10 @@ cdef class AlignedSegment:
<,=,> to *other*
'''
+ # avoid segfault when other equals None
+ if other is None:
+ return -1
+
cdef int retval, x
cdef bam1_t *t
cdef bam1_t *o
@@ -1021,7 +1026,7 @@ cdef class AlignedSegment:
cdef uint8_t *a = <uint8_t*>&t.core
cdef uint8_t *b = <uint8_t*>&o.core
-
+
retval = memcmp(&t.core, &o.core, sizeof(bam1_core_t))
if retval:
return retval
@@ -1072,7 +1077,7 @@ cdef class AlignedSegment:
raise ValueError('sam_format failed')
else:
raise NotImplementedError("todo")
-
+
ret = force_str(line.s[:line.l])
if line.m:
@@ -1088,7 +1093,8 @@ cdef class AlignedSegment:
Parameters
----------
- sam -- :term:`SAM` formatted string
+ sam:
+ :term:`SAM` formatted string
"""
cdef AlignedSegment dest = cls.__new__(cls)
@@ -1101,7 +1107,7 @@ cdef class AlignedSegment:
line.s = _sam
sam_parse1(&line, dest.header.ptr, dest._delegate)
-
+
return dest
cpdef tostring(self, htsfile=None):
@@ -1110,13 +1116,14 @@ cdef class AlignedSegment:
Parameters
----------
- htsfile -- (deprecated) AlignmentFile object to map numerical
- identifiers to chromosome names. This parameter is present
- for backwards compatibility and ignored.
+ htsfile:
+ (deprecated) AlignmentFile object to map numerical
+ identifiers to chromosome names. This parameter is present
+ for backwards compatibility and ignored.
"""
return self.to_string()
-
+
def to_dict(self):
"""returns a json representation of the aligned segment.
@@ -1133,8 +1140,9 @@ cdef class AlignedSegment:
Parameters
----------
- sam_dict -- dictionary of alignment values, keys corresponding to output from
- :meth:`todict()`.
+ sam_dict:
+ dictionary of alignment values, keys corresponding to output from
+ :meth:`todict()`.
"""
# let htslib do the parsing
@@ -1143,7 +1151,7 @@ cdef class AlignedSegment:
"\t".join((sam_dict[x] for x in KEY_NAMES[:-1])) +
"\t" +
"\t".join(sam_dict.get(KEY_NAMES[-1], [])), header)
-
+
########################################################
## Basic attributes in order of appearance in SAM format
property query_name:
@@ -1187,7 +1195,7 @@ cdef class AlignedSegment:
src.core.l_extranul = l_extranul
src.core.l_qname = l + l_extranul
-
+
# re-acquire pointer to location in memory
# as it might have moved
p = pysam_bam_get_qname(src)
@@ -1321,7 +1329,7 @@ cdef class AlignedSegment:
return self.header.get_reference_name(self._delegate.core.mtid)
else:
raise ValueError("next_reference_name unknown if no header associated with record")
-
+
def __set__(self, reference):
cdef int mtid
if reference is None or reference == "*":
@@ -1434,7 +1442,7 @@ cdef class AlignedSegment:
nbytes_old,
nbytes_new,
p)
-
+
if retval == NULL:
raise MemoryError("could not allocate memory")
@@ -1564,7 +1572,7 @@ cdef class AlignedSegment:
# setting the unmapped flag requires recalculation of
# bin as alignment length is now implicitely 1
update_bin(self._delegate)
-
+
property mate_is_unmapped:
"""true if the mate is unmapped"""
def __get__(self):
@@ -1834,7 +1842,7 @@ cdef class AlignedSegment:
def get_forward_sequence(self):
"""return the original read sequence.
-
+
Reads mapping to the reverse strand will be reverse
complemented.
@@ -1849,7 +1857,7 @@ cdef class AlignedSegment:
def get_forward_qualities(self):
"""return base qualities of the read sequence.
-
+
Reads mapping to the reverse strand will be reversed.
"""
if self.is_reverse:
@@ -1857,7 +1865,7 @@ cdef class AlignedSegment:
else:
return self.query_qualities
-
+
def get_aligned_pairs(self, matches_only=False, with_seq=False):
"""a list of aligned read (query) and reference positions.
@@ -2075,15 +2083,11 @@ cdef class AlignedSegment:
If no cigar string is present, empty arrays will be returned.
- Parameters
- ----------
-
- Returns
- -------
-
- arrays : two arrays. The first contains the nucleotide counts within
- each cigar operation, the second contains the number of blocks for
- each cigar operation.
+ Returns:
+ arrays :
+ two arrays. The first contains the nucleotide counts within
+ each cigar operation, the second contains the number of blocks
+ for each cigar operation.
"""
@@ -2199,7 +2203,7 @@ cdef class AlignedSegment:
values = []
cdef uint32_t ncigar = len(values)
-
+
cdef bam1_t * retval = pysam_bam_update(src,
pysam_get_n_cigar(src) * 4,
ncigar * 4,
@@ -2245,7 +2249,7 @@ cdef class AlignedSegment:
This method accepts valid SAM specification value types, which
are::
-
+
A: printable char
i: signed int
f: float
@@ -2260,12 +2264,12 @@ cdef class AlignedSegment:
When deducing the type code by the python type of *value*, the
following mapping is applied::
-
+
i: python int
f: python float
Z: python str or bytes
B: python array.array, list or tuple
-
+
Note that a single character string will be output as 'Z' and
not 'A' as the former is the more general type.
"""
@@ -2406,27 +2410,24 @@ cdef class AlignedSegment:
specification) as well as additional value type 'd' as
implemented in htslib.
- Parameters
- ----------
+ Parameters:
- tag :
- data tag.
+ tag :
+ data tag.
- with_value_type : Optional[bool]
- if set to True, the return value is a tuple of (tag value, type code).
- (default False)
+ with_value_type : Optional[bool]
+ if set to True, the return value is a tuple of (tag value, type
+ code). (default False)
- Returns
- -------
+ Returns:
- A python object with the value of the `tag`. The type of the
- object depends on the data type in the data record.
+ A python object with the value of the `tag`. The type of the
+ object depends on the data type in the data record.
- Raises
- ------
+ Raises:
- KeyError
- If `tag` is not present, a KeyError is raised.
+ KeyError
+ If `tag` is not present, a KeyError is raised.
"""
cdef uint8_t * v
@@ -2799,14 +2800,14 @@ cdef class PileupColumn:
"""set the minimum base quality for this pileup column.
"""
self.min_base_quality = min_base_quality
-
+
def __len__(self):
"""return number of reads aligned to this column.
see :meth:`get_num_aligned`
"""
return self.get_num_aligned()
-
+
property reference_id:
'''the reference sequence number as defined in the header'''
def __get__(self):
@@ -2883,7 +2884,7 @@ cdef class PileupColumn:
def get_num_aligned(self):
"""return number of aligned bases at pileup column position.
-
+
This method applies a base quality filter and the number is
equal to the size of :meth:`get_query_sequences`,
:meth:`get_mapping_qualities`, etc.
@@ -2895,7 +2896,7 @@ cdef class PileupColumn:
cdef bam_pileup1_t * p = NULL
if self.plp == NULL or self.plp[0] == NULL:
raise ValueError("PileupColumn accessed after iterator finished")
-
+
for x from 0 <= x < self.n_pu:
p = &(self.plp[0][x])
if p == NULL:
@@ -2912,7 +2913,7 @@ cdef class PileupColumn:
Optionally, the bases/sequences can be annotated according to the samtools
mpileup format. This is the format description from the samtools mpileup tool::
-
+
Information on match, mismatch, indel, strand, mapping
quality and start and end of a read are all encoded at the
read base column. At this column, a dot stands for a match
@@ -2934,7 +2935,7 @@ cdef class PileupColumn:
To reproduce samtools mpileup format, set all of mark_matches,
mark_ends and add_indels to True.
-
+
Parameters
----------
@@ -2954,7 +2955,7 @@ cdef class PileupColumn:
If True, add bases for bases inserted into the reference and
'N's for base skipped from the reference. If a reference sequence
is given, add the actual bases.
-
+
Returns
-------
@@ -2968,7 +2969,7 @@ cdef class PileupColumn:
cdef uint8_t rb = 0
cdef kstring_t * buf = &self.buf
cdef bam_pileup1_t * p = NULL
-
+
if self.plp == NULL or self.plp[0] == NULL:
raise ValueError("PileupColumn accessed after iterator finished")
@@ -2987,7 +2988,7 @@ cdef class PileupColumn:
# see samtools pileup_seq
if mark_ends and p.is_head:
kputc('^', buf)
-
+
if p.b.core.qual > 93:
kputc(126, buf)
else:
@@ -3059,7 +3060,7 @@ cdef class PileupColumn:
raise ValueError(
"pileup buffer out of sync - most likely use of iterator "
"outside loop")
-
+
if p.qpos < p.b.core.l_qseq:
c = bam_get_qual(p.b)[p.qpos]
else:
@@ -3079,7 +3080,7 @@ cdef class PileupColumn:
"""
if self.plp == NULL or self.plp[0] == NULL:
raise ValueError("PileupColumn accessed after iterator finished")
-
+
cdef uint32_t x = 0
cdef bam_pileup1_t * p = NULL
result = []
@@ -3089,7 +3090,7 @@ cdef class PileupColumn:
raise ValueError(
"pileup buffer out of sync - most likely use of iterator "
"outside loop")
-
+
if pileup_base_qual_skip(p, self.min_base_quality):
continue
result.append(p.b.core.qual)
@@ -3115,7 +3116,7 @@ cdef class PileupColumn:
raise ValueError(
"pileup buffer out of sync - most likely use of iterator "
"outside loop")
-
+
if pileup_base_qual_skip(p, self.min_base_quality):
continue
result.append(p.qpos)
@@ -3131,7 +3132,7 @@ cdef class PileupColumn:
"""
if self.plp == NULL or self.plp[0] == NULL:
raise ValueError("PileupColumn accessed after iterator finished")
-
+
cdef uint32_t x = 0
cdef bam_pileup1_t * p = NULL
result = []
@@ -3141,12 +3142,12 @@ cdef class PileupColumn:
raise ValueError(
"pileup buffer out of sync - most likely use of iterator "
"outside loop")
-
+
if pileup_base_qual_skip(p, self.min_base_quality):
continue
result.append(charptr_to_str(pysam_bam_get_qname(p.b)))
return result
-
+
cdef class PileupRead:
'''Representation of a read aligned to a particular position in the
@@ -3230,7 +3231,7 @@ cdef class PileupRead:
def __get__(self):
return self._is_refskip
-
+
cpdef enum CIGAR_OPS:
CMATCH = 0
@@ -3246,30 +3247,30 @@ cpdef enum CIGAR_OPS:
cpdef enum SAM_FLAGS:
- # the read is paired in sequencing, no matter whether it is mapped in a pair
+ # the read is paired in sequencing, no matter whether it is mapped in a pair
FPAIRED = 1
- # the read is mapped in a proper pair
+ # the read is mapped in a proper pair
FPROPER_PAIR = 2
- # the read itself is unmapped; conflictive with FPROPER_PAIR
+ # the read itself is unmapped; conflictive with FPROPER_PAIR
FUNMAP = 4
- # the mate is unmapped
+ # the mate is unmapped
FMUNMAP = 8
- # the read is mapped to the reverse strand
+ # the read is mapped to the reverse strand
FREVERSE = 16
- # the mate is mapped to the reverse strand
+ # the mate is mapped to the reverse strand
FMREVERSE = 32
- # this is read1
+ # this is read1
FREAD1 = 64
- # this is read2
+ # this is read2
FREAD2 = 128
- # not primary alignment
+ # not primary alignment
FSECONDARY = 256
- # QC failure
+ # QC failure
FQCFAIL = 512
- # optical or PCR duplicate
+ # optical or PCR duplicate
FDUP = 1024
- # supplementary alignment
- FSUPPLEMENTARY = 2048
+ # supplementary alignment
+ FSUPPLEMENTARY = 2048
__all__ = [
=====================================
pysam/libcalignmentfile.pyx
=====================================
@@ -265,7 +265,7 @@ cdef class AlignmentHeader(object):
if self.ptr.target_name[x] == NULL:
raise MemoryError("could not allocate {} bytes".format(len(name) + 1, sizeof(char)))
strncpy(self.ptr.target_name[x], name, len(name))
-
+
return self
@classmethod
@@ -282,9 +282,9 @@ cdef class AlignmentHeader(object):
raise KeyError("incomplete sequence information in '%s'" % str(fields))
except ValueError:
raise ValueError("wrong sequence information in '%s'" % str(fields))
-
+
return cls._from_text_and_lengths(text, reference_names, reference_lengths)
-
+
@classmethod
def from_dict(cls, header_dict):
@@ -380,12 +380,12 @@ cdef class AlignmentHeader(object):
def _build_sequence_section(self):
"""return sequence section of header.
-
+
The sequence section is built from the list of reference names and
lengths stored in the BAM-file and not from any @SQ entries that
are part of the header's text section.
"""
-
+
cdef int x
text = []
for x in range(self.ptr.n_targets):
@@ -393,7 +393,7 @@ cdef class AlignmentHeader(object):
force_str(self.ptr.target_name[x]),
self.ptr.target_len[x]))
return "".join(text)
-
+
def to_dict(self):
"""return two-level dictionary with header information from the file.
@@ -504,7 +504,7 @@ cdef class AlignmentHeader(object):
raise KeyError("unknown reference {}".format(reference))
else:
return self.ptr.target_len[tid]
-
+
def is_valid_tid(self, int tid):
"""
return True if the numerical :term:`tid` is valid; False otherwise.
@@ -522,7 +522,7 @@ cdef class AlignmentHeader(object):
"""
reference = force_bytes(reference)
return bam_name2id(self.ptr, reference)
-
+
def __str__(self):
'''string with the full contents of the :term:`sam file` header as a
string.
@@ -561,7 +561,7 @@ cdef class AlignmentHeader(object):
def get(self, *args):
return self.to_dict().get(*args)
-
+
def __len__(self):
return self.to_dict().__len__()
@@ -887,7 +887,7 @@ cdef class AlignmentFile(HTSFile):
raise ValueError(
"either supply options `template`, `header`, `text` or both `reference_names` "
"and `reference_lengths` for writing")
-
+
if template:
# header is copied, though at the moment not strictly
# necessary as AlignmentHeader is immutable.
@@ -978,7 +978,7 @@ cdef class AlignmentFile(HTSFile):
"SAM? file does not have a valid header (mode='%s'), "
"please provide reference_names and reference_lengths")
self.header = makeAlignmentHeader(hdr)
-
+
# set filename with reference sequences
if self.is_cram and reference_filename:
creference_filename = self.reference_filename
@@ -1283,7 +1283,7 @@ cdef class AlignmentFile(HTSFile):
ignore orphans (paired reads that are not in a proper pair).
The default is to ignore orphans.
-
+
min_base_quality: int
Minimum base quality. Bases below the minimum quality will
@@ -1323,7 +1323,7 @@ cdef class AlignmentFile(HTSFile):
"""
cdef int rtid, has_coord
cdef int32_t rstart, rstop
-
+
if not self.is_open:
raise ValueError("I/O operation on closed file")
@@ -1534,7 +1534,7 @@ cdef class AlignmentFile(HTSFile):
raise ValueError("interval of size 0")
if _stop < _start:
raise ValueError("interval of size less than 0")
-
+
cdef int length = _stop - _start
cdef c_array.array int_array_template = array.array('L', [])
cdef c_array.array count_a
@@ -1582,7 +1582,7 @@ cdef class AlignmentFile(HTSFile):
# count
seq = read.seq
quality = read.query_qualities
-
+
for qpos, refpos in read.get_aligned_pairs(True):
if qpos is not None and refpos is not None and \
_start <= refpos < _stop:
@@ -1646,14 +1646,14 @@ cdef class AlignmentFile(HTSFile):
base_position = r.pos
for op, nt in r.cigartuples:
- if op in match_or_deletion:
+ if op in match_or_deletion:
base_position += nt
- elif op == BAM_CREF_SKIP:
+ elif op == BAM_CREF_SKIP:
junc_start = base_position
base_position += nt
res[(junc_start, base_position)] += 1
return res
-
+
def close(self):
'''closes the :class:`pysam.AlignmentFile`.'''
@@ -1705,16 +1705,14 @@ cdef class AlignmentFile(HTSFile):
'''
write a single :class:`pysam.AlignedSegment` to disk.
- Raises
- ------
- ValueError
- if the writing failed
-
- Returns
- -------
+ Raises:
+ ValueError
+ if the writing failed
- int : the number of bytes written. If the file is closed,
- this will be 0.
+ Returns:
+ int :
+ the number of bytes written. If the file is closed,
+ this will be 0.
'''
if not self.is_open:
return 0
@@ -1724,7 +1722,7 @@ cdef class AlignmentFile(HTSFile):
"AlignedSegment refers to reference number {} that "
"is larger than the number of references ({}) in the header".format(
read._delegate.core.tid, self.header.ptr.n_targets))
-
+
cdef int ret
with nogil:
ret = sam_write1(self.htsfile,
@@ -1800,12 +1798,12 @@ cdef class AlignmentFile(HTSFile):
"""return statistics about mapped/unmapped reads per chromosome as
they are stored in the index.
- Returns
- -------
- list : a list of records for each chromosome. Each record has the attributes 'contig',
- 'mapped', 'unmapped' and 'total'.
+ Returns:
+ list :
+ a list of records for each chromosome. Each record has the
+ attributes 'contig', 'mapped', 'unmapped' and 'total'.
"""
-
+
self.check_index()
cdef int tid
cdef uint64_t mapped, unmapped
@@ -1820,7 +1818,7 @@ cdef class AlignmentFile(HTSFile):
mapped,
unmapped,
mapped + unmapped)))
-
+
return results
###############################################################
@@ -1899,7 +1897,7 @@ cdef class AlignmentFile(HTSFile):
if self.header is None:
raise ValueError("header not available in closed files")
return self.header.get_reference_length(reference)
-
+
property nreferences:
"""int with the number of :term:`reference` sequences in the file.
This is a read-only attribute."""
@@ -1978,7 +1976,7 @@ cdef class IteratorRow:
cdef char *cfilename
cdef char *creference_filename
cdef char *cindexname = NULL
-
+
if not samfile.is_open:
raise ValueError("I/O operation on closed file")
@@ -1989,7 +1987,7 @@ cdef class IteratorRow:
# reopen the file - note that this makes the iterator
# slow and causes pileup to slow down significantly.
if multiple_iterators:
-
+
cfilename = samfile.filename
with nogil:
self.htsfile = hts_open(cfilename, 'r')
@@ -2002,7 +2000,7 @@ cdef class IteratorRow:
self.index = sam_index_load2(self.htsfile, cfilename, cindexname)
else:
self.index = NULL
-
+
# need to advance in newly opened file to position after header
# better: use seek/tell?
with nogil:
@@ -2012,7 +2010,7 @@ cdef class IteratorRow:
self.header = makeAlignmentHeader(hdr)
self.owns_samfile = True
-
+
# options specific to CRAM files
if samfile.is_cram and samfile.reference_filename:
creference_filename = samfile.reference_filename
@@ -2094,7 +2092,7 @@ cdef class IteratorRowRegion(IteratorRow):
raise IOError('truncated file')
else:
raise IOError("error while reading file {}: {}".format(self.samfile.filename, self.retval))
-
+
def __dealloc__(self):
hts_itr_destroy(self.iter)
@@ -2326,7 +2324,7 @@ cdef int __advance_nofilter(void *data, bam1_t *b):
cdef int __advance_all(void *data, bam1_t *b):
- '''only use reads for pileup passing basic filters such as
+ '''only use reads for pileup passing basic filters such as
BAM_FUNMAP, BAM_FSECONDARY, BAM_FQCFAIL, BAM_FDUP
'''
@@ -2362,7 +2360,7 @@ cdef int __advance_samtools(void * data, bam1_t * b):
continue
if d.flag_require and not (b.core.flag & d.flag_require):
continue
-
+
# reload sequence
if d.fastafile != NULL and b.core.tid != d.tid:
if d.seq != NULL:
@@ -2388,21 +2386,21 @@ cdef int __advance_samtools(void * data, bam1_t * b):
sam_prob_realn(b, d.seq, d.seq_len, 7)
else:
sam_prob_realn(b, d.seq, d.seq_len, 3)
-
+
if d.seq != NULL and d.adjust_capq_threshold > 10:
q = sam_cap_mapq(b, d.seq, d.seq_len, d.adjust_capq_threshold)
if q < 0:
continue
elif b.core.qual > q:
b.core.qual = q
-
+
if b.core.qual < d.min_mapping_quality:
continue
if d.ignore_orphans and b.core.flag & BAM_FPAIRED and not (b.core.flag & BAM_FPROPER_PAIR):
continue
-
+
break
-
+
return ret
@@ -2453,7 +2451,7 @@ cdef class IteratorColumn:
self.iterdata.compute_baq = kwargs.get("compute_baq", True)
self.iterdata.redo_baq = kwargs.get("redo_baq", False)
self.iterdata.ignore_orphans = kwargs.get("ignore_orphans", True)
-
+
self.tid = 0
self.pos = 0
self.n_plp = 0
@@ -2497,7 +2495,7 @@ cdef class IteratorColumn:
'''
return true if iterator is associated with a reference'''
return self.fastafile
-
+
cdef _setup_iterator(self,
int tid,
int start,
@@ -2523,7 +2521,7 @@ cdef class IteratorColumn:
cdef void * data[1]
data[0] = <void*>&self.iterdata
-
+
if self.stepper is None or self.stepper == "all":
with nogil:
self.pileup_iter = bam_mplp_init(1,
@@ -2550,7 +2548,7 @@ cdef class IteratorColumn:
if self.ignore_overlaps:
with nogil:
bam_mplp_init_overlaps(self.pileup_iter)
-
+
cdef reset(self, tid, start, stop):
'''reset iterator position.
@@ -2572,7 +2570,7 @@ cdef class IteratorColumn:
# &self.iterdata)
with nogil:
bam_mplp_reset(self.pileup_iter)
-
+
cdef _free_pileup_iter(self):
'''free the memory alloc'd by bam_plp_init.
@@ -2593,9 +2591,9 @@ cdef class IteratorColumn:
if self.iterdata.seq != NULL:
free(self.iterdata.seq)
self.iterdata.seq = NULL
-
+
# backwards compatibility
-
+
def hasReference(self):
return self.has_reference()
cdef char * getSequence(self):
@@ -2603,7 +2601,7 @@ cdef class IteratorColumn:
def addReference(self, FastaFile fastafile):
return self.add_reference(fastafile)
-
+
cdef class IteratorColumnRegion(IteratorColumn):
'''iterates over a region only.
'''
@@ -2630,7 +2628,7 @@ cdef class IteratorColumnRegion(IteratorColumn):
def __next__(self):
cdef int n
-
+
while 1:
n = self.cnext()
if n < 0:
@@ -2823,7 +2821,7 @@ cdef class IndexedReads:
cdef uint64_t pos
cdef bam_hdr_t * hdr = self.header.ptr
-
+
while ret > 0:
with nogil:
pos = bgzf_tell(hts_get_bgzfp(self.htsfile))
=====================================
pysam/libcbcf.pyx
=====================================
@@ -418,7 +418,7 @@ cdef bcf_copy_expand_array(void *src_data, int src_type, size_t src_values,
"""copy data from src to dest where the size of the elements (src_type/dst_type) differ
as well as the number of elements (src_values/dst_values).
"""
-
+
cdef char *src_datac
cdef char *dst_datac
cdef int8_t *src_datai8
@@ -817,7 +817,7 @@ cdef bcf_format_set_value(VariantRecordSample sample, key, value):
if key == 'phased':
sample.phased = bool(value)
return
-
+
cdef bcf_hdr_t *hdr = sample.record.header.ptr
cdef bcf1_t *r = sample.record.ptr
cdef int fmt_id
@@ -863,7 +863,7 @@ cdef bcf_format_set_value(VariantRecordSample sample, key, value):
&value_count, &scalar, &realloc)
vlen = value_count < 0
value_count = len(values)
-
+
# If we can, write updated values to existing allocated storage.
if fmt and not realloc:
r.d.indiv_dirty = 1
@@ -2124,6 +2124,7 @@ cdef class VariantHeader(object):
if self.ptr.dirty:
bcf_hdr_sync(self.ptr)
+
def add_meta(self, key, value=None, items=None):
"""Add metadata to this header"""
if not ((value is not None) ^ (items is not None)):
@@ -2140,11 +2141,16 @@ cdef class VariantHeader(object):
hrec.value = strdup(force_bytes(value))
else:
for key, value in items:
+
+ quoted = True
+ if key in set(("ID", "Number", "Type")):
+ quoted = False
+
key = force_bytes(key)
bcf_hrec_add_key(hrec, key, <int>len(key))
value = force_bytes(str(value))
- quoted = strpbrk(value, ' ;,"\t<>') != NULL
+
bcf_hrec_set_val(hrec, hrec.nkeys-1, value, <int>len(value), quoted)
except:
bcf_hrec_destroy(hrec)
@@ -3134,6 +3140,8 @@ cdef class VariantRecord(object):
raise ValueError('Error unpacking VariantRecord')
# causes a memory leak https://github.com/pysam-developers/pysam/issues/773
# return bcf_str_cache_get_charptr(r.d.id) if r.d.id != b'.' else None
+ if (r.d.m_id == 0):
+ raise ValueError('Error extracing ID')
return charptr_to_str(r.d.id) if r.d.id != b'.' else None
@id.setter
@@ -4080,6 +4088,7 @@ cdef class VariantFile(HTSFile):
def __next__(self):
cdef int ret
+ cdef int errcode
cdef bcf1_t *record = bcf_init1()
if not record:
@@ -4093,7 +4102,10 @@ cdef class VariantFile(HTSFile):
ret = bcf_read1(self.htsfile, self.header.ptr, record)
if ret < 0:
+ errcode = record.errcode
bcf_destroy1(record)
+ if errcode:
+ raise IOError('unable to parse next record')
if ret == -1:
raise StopIteration
elif ret == -2:
@@ -4445,3 +4457,4 @@ cdef class VariantFile(HTSFile):
# potentially unnecessary optimization that also sets max_unpack
if not include_samples:
self.drop_samples = True
+
=====================================
pysam/libcfaidx.pyx
=====================================
@@ -646,8 +646,14 @@ cdef class FastxFile:
if self.persist:
return FastxRecord(proxy=makeFastqProxy(self.entry))
return makeFastqProxy(self.entry)
- else:
+ elif (l == -1):
raise StopIteration
+ elif (l == -2):
+ raise ValueError('truncated quality string in {0}'
+ .format(self._filename))
+ else:
+ raise ValueError('unknown problem parsing {0}'
+ .format(self._filename))
# Compatibility Layer for pysam 0.8.1
cdef class FastqFile(FastxFile):
=====================================
pysam/libchtslib.pyx
=====================================
@@ -617,9 +617,10 @@ cdef class HTSFile(object):
Returns
-------
- tuple : a tuple of `flag`, :term:`tid`, `start` and
- `stop`. The flag indicates whether no coordinates were
- supplied and the genomic region is the complete genomic space.
+ tuple :
+ a tuple of `flag`, :term:`tid`, `start` and
+ `stop`. The flag indicates whether no coordinates were
+ supplied and the genomic region is the complete genomic space.
Raises
------
=====================================
pysam/libcutils.pyx
=====================================
@@ -284,7 +284,7 @@ def _pysam_dispatch(collection,
if skip_next:
skip_next = False
continue
- if arg in SIMPLE_FLAGS or (len(arg) > 2 and arg.startswith('-@')):
+ if arg in SIMPLE_FLAGS or (len(arg) > 2 and force_bytes(arg).startswith(b'-@')):
continue
if arg in ARGUMENTS:
skip_next = True
=====================================
pysam/samtools.py
=====================================
@@ -1,4 +1,4 @@
-from utils import PysamDispatcher
+from pysam.utils import PysamDispatcher
# samtools command line options to export in python
#
=====================================
pysam/version.py
=====================================
@@ -1,5 +1,5 @@
# pysam versioning information
-__version__ = "0.15.3"
+__version__ = "0.15.4"
# TODO: upgrade number
__samtools_version__ = "1.9"
=====================================
requirements.txt
=====================================
@@ -1 +1 @@
-cython>=0.24.1
+cython>=0.29.12
=====================================
setup.py
=====================================
@@ -29,7 +29,7 @@ import subprocess
import sys
import sysconfig
from contextlib import contextmanager
-from setuptools import Extension, setup
+from setuptools import setup
from cy_build import CyExtension as Extension, cy_build_ext as build_ext
try:
import cython
@@ -443,7 +443,7 @@ metadata = {
'classifiers': [_f for _f in classifiers.split("\n") if _f],
'url': "https://github.com/pysam-developers/pysam",
'packages': package_list,
- 'requires': ['cython (>=0.21)'],
+ 'requires': ['cython (>=0.29.12)'],
'ext_modules': [Extension(**opts) for opts in modules],
'cmdclass': cmdclass,
'package_dir': package_dirs,
=====================================
tests/AlignedSegment_test.py
=====================================
@@ -78,6 +78,12 @@ class TestAlignedSegment(ReadTest):
def testCompare(self):
'''check comparison functions.'''
a = self.build_read()
+ b = None
+
+ self.assertFalse(a is b)
+ self.assertFalse(a == b)
+ self.assertEqual(-1, a.compare(b))
+
b = self.build_read()
self.assertEqual(0, a.compare(b))
View it on GitLab: https://salsa.debian.org/med-team/python-pysam/commit/f4cfe1fb151bb1d64dda10ff78b974e968826c5e
--
View it on GitLab: https://salsa.debian.org/med-team/python-pysam/commit/f4cfe1fb151bb1d64dda10ff78b974e968826c5e
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200123/0984e58c/attachment-0001.html>
More information about the debian-med-commit
mailing list