[med-svn] [Git][med-team/cyvcf2][master] 3 commits: New upstream version 0.20.1

Nilesh Patra gitlab at salsa.debian.org
Thu Jul 9 18:31:15 BST 2020



Nilesh Patra pushed to branch master at Debian Med / cyvcf2


Commits:
09dacf19 by Nilesh Patra at 2020-07-09T22:36:55+05:30
New upstream version 0.20.1
- - - - -
56efdcee by Nilesh Patra at 2020-07-09T22:36:56+05:30
Update upstream source from tag 'upstream/0.20.1'

Update to upstream version '0.20.1'
with Debian dir d7c54541c5b2963605c95f7eea7b6973097541e1
- - - - -
b3994365 by Nilesh Patra at 2020-07-09T23:00:36+05:30
Refresh patch and cythonize modules

- - - - -


11 changed files:

- .gitignore
- .travis.yml
- README.md
- cyvcf2/__init__.py
- cyvcf2/cyvcf2.pxd
- cyvcf2/cyvcf2.pyx
- cyvcf2/tests/test_reader.py
- debian/patches/add_htslib_link_dependency.patch
- docs/source/conf.py
- docs/source/index.rst
- setup.py


Changes:

=====================================
.gitignore
=====================================
@@ -18,3 +18,4 @@ _static
 _templates
 setup-requires/*
 .cache/v/cache/lastfailed
+.idea


=====================================
.travis.yml
=====================================
@@ -15,7 +15,7 @@ install:
   - ./configure --enable-s3 --disable-lzma --disable-bz2
   - make
   - cd ..
-  - python setup.py install
+  - CYTHONIZE=1 python setup.py install
 
 script:
   - python setup.py test


=====================================
README.md
=====================================
@@ -1,6 +1,8 @@
 cyvcf2
 ======
 
+Note: cyvcf2 versions < 0.20.0 require htslib < 1.10. cyvcf2 versions >= 0.20.0 require htslib >= 1.10
+
 <!-- ghp-import -p docs/build/html/ -->
 [![Docs](https://img.shields.io/badge/docs-latest-blue.svg)](http://brentp.github.io/cyvcf2/)
 
@@ -9,7 +11,7 @@ If you use cyvcf2, please cite the [paper](https://academic.oup.com/bioinformati
 
 Fast python **(2 and 3)** parsing of VCF and BCF including region-queries.
 
-[![Build Status](https://travis-ci.org/brentp/cyvcf2.svg?branch=master)](https://travis-ci.org/brentp/cyvcf2)
+[![Build Status](https://travis-ci.com/brentp/cyvcf2.svg?branch=master)](https://travis-ci.com/brentp/cyvcf2)
 
 cyvcf2 is a cython wrapper around [htslib](https://github.com/samtools/htslib) built for fast parsing of [Variant Call Format](https://en.m.wikipedia.org/wiki/Variant_Call_Format) (VCF) files.
 
@@ -68,7 +70,7 @@ for v in vcf('11:435345-556565'):
 Installation
 ============
 
-## pip
+## pip (assuming you have htslib < 1.10 installed)
 ```
 pip install cyvcf2
 ```
@@ -84,7 +86,7 @@ autoconf
 make
 
 cd ..
-pip install -e .
+CYTHONIZE=1 pip install -e .
 ```
 
 On **OSX**, using brew, you may have to set the following as indicated by the brew install:


=====================================
cyvcf2/__init__.py
=====================================
@@ -2,4 +2,4 @@ from .cyvcf2 import (VCF, Variant, Writer, r_ as r_unphased, par_relatedness,
                      par_het)
 Reader = VCFReader = VCF
 
-__version__ = "0.11.6"
+__version__ = "0.20.1"


=====================================
cyvcf2/cyvcf2.pxd
=====================================
@@ -1,4 +1,4 @@
-from libc.stdint cimport int32_t, uint32_t, int8_t, int16_t, uint8_t
+from libc.stdint cimport int64_t, int32_t, uint32_t, int8_t, int16_t, uint8_t
 import numpy as np
 cimport numpy as np
 np.import_array()
@@ -23,7 +23,7 @@ cdef extern from "htslib/kstring.h":
         size_t l, m;
         char *s;
 
-    inline char *ks_release(kstring_t *s)
+    char *ks_release(kstring_t *s)
 
 cdef extern from "htslib/hfile.h":
     ctypedef struct hFILE:
@@ -79,7 +79,7 @@ cdef extern from "htslib/tbx.h":
 
     tbx_t *tbx_index_load(const char *fn);
     tbx_t *tbx_index_load2(const char *fn, const char *fnidx);
-    hts_itr_t *tbx_itr_queryi(tbx_t *tbx, int tid, int beg, int end)
+    hts_itr_t *tbx_itr_queryi(tbx_t *tbx, int tid, int64_t beg, int64_t end)
     hts_itr_t *tbx_itr_querys(tbx_t *tbx, char *reg) nogil
     int tbx_itr_next(htsFile *fp, tbx_t *tbx, hts_itr_t *iter, void *data) nogil;
     void tbx_destroy(tbx_t *tbx);
@@ -109,8 +109,6 @@ cdef extern from "htslib/vcf.h":
     const int BCF_BT_FLOAT  = 5
     const int BCF_BT_CHAR   = 7
 
-    const int BCF_HT_FLAG = 0
-
     const int bcf_str_missing = 0x07
     const int bcf_str_vector_end = 0
 
@@ -139,13 +137,14 @@ cdef extern from "htslib/vcf.h":
 
     ctypedef struct bcf_info_t:
         int key;        # key: numeric tag id, the corresponding string is bcf_hdr_t::id[BCF_DT_ID][$key].key
-        int type, len;  # type: one of BCF_BT_* types; len: vector length, 1 for scalars
+        int type;  # type: one of BCF_BT_* types; len: vector length, 1 for scalars
         #} v1; # only set if $len==1; for easier access
         uv1 v1
         uint8_t *vptr;          # pointer to data array in bcf1_t->shared.s, excluding the size+type and tag id bytes
         uint32_t vptr_len;      # length of the vptr block or, when set, of the vptr_mod block, excluding offset
         uint32_t vptr_off;
         uint32_t vptr_free;   # vptr offset, i.e., the size of the INFO key plus size+type bytes
+        int len;
                # indicates that vptr-vptr_off must be freed; set only when modified and the new
 
 
@@ -164,12 +163,12 @@ cdef extern from "htslib/vcf.h":
         int indiv_dirty;    # if set, indiv.s must be recreated on BCF output
 
     ctypedef struct bcf1_t:
+        int64_t pos;  #// POS
+        int64_t rlen; #// length of REF
         int32_t rid;  #// CHROM
-        int32_t pos;  #// POS
-        int32_t rlen; #// length of REF
         float qual;   #// QUAL
         uint32_t n_info, n_allele;
-        uint8_t n_fmt #//:8 #//, n_sample:24;
+        uint32_t n_fmt #//:8 #//, n_sample:24;
         #kstring_t shared, indiv;
         bcf_dec_t d; #// lazy evaluation: $d is not generated by bcf_read(), but by explicitly calling bcf_unpack()
         int max_unpack;        # // Set to BCF_UN_STR, BCF_UN_FLT, or BCF_UN_INFO to boost performance of vcf_parse when some of the fields wont be needed
@@ -215,6 +214,7 @@ cdef extern from "htslib/vcf.h":
         int nsamples_ori;        # for bcf_hdr_set_samples()
         uint8_t *keep_samples;
         kstring_t mem;
+        int32_t m[3];
 
 
     void bcf_float_set(float *ptr, uint32_t value)
@@ -235,6 +235,8 @@ cdef extern from "htslib/vcf.h":
     int bcf_hdr_nsamples(const bcf_hdr_t *hdr);
     void bcf_hdr_destroy(const bcf_hdr_t *hdr)
     char *bcf_hdr_fmt_text(const bcf_hdr_t *hdr, int is_bcf, int *len);
+    int bcf_hdr_format(const bcf_hdr_t *hdr, int is_bcf, kstring_t *str);
+
     bcf_hdr_t *bcf_hdr_init(const char *mode);
     int bcf_hdr_parse(bcf_hdr_t *hdr, char *htxt);
 
@@ -280,7 +282,7 @@ cdef extern from "htslib/vcf.h":
 
 
     ## genotypes
-    inline void bcf_gt2alleles(int igt, int *a, int *b);
+    void bcf_gt2alleles(int igt, int *a, int *b);
     int bcf_update_genotypes(const bcf_hdr_t *hdr, bcf1_t *line, const void *values, int n);
     # idx is 0 for ref, 1... for alts...
     int bcf_gt_phased(int idx);


=====================================
cyvcf2/cyvcf2.pyx
=====================================
@@ -11,7 +11,10 @@ import numpy as np
 from array import array
 import math
 import ctypes
-
+try:
+  from pathlib import Path
+except ImportError:
+  from pathlib2 import Path  # python 2 backport
 
 from libc cimport stdlib
 cimport numpy as np
@@ -130,7 +133,87 @@ cdef set_constants(VCF v):
         v.HOM_ALT = 3
 
 
-cdef class VCF:
+
+cdef class HTSFile:
+
+    cdef htsFile *hts
+    cdef bytes fname
+    cdef bytes mode
+    cdef bint from_path
+
+    cdef _open_htsfile(self, fname, mode):
+        """Opens an htsfile for reading or writing.
+
+        Parameters
+        ----------
+        fname: str
+            filename (str or Path), file descriptor (int), or file-like object (has fileno method).
+        mode: str
+            the mode to pass to hts_open.
+        """
+        cdef hFILE *hf
+        self.mode = to_bytes(mode)
+        reading = b"r" in self.mode
+        if not reading and b"w" not in self.mode:
+            raise IOError("No 'r' or 'w' in mode %s" % str(self.mode))
+        self.from_path = False
+        # for htslib, wbu seems to not work
+        if mode == b"wbu":
+            mode = to_bytes(b"wb0")
+        if isinstance(fname, (basestring, Path)):
+            self.from_path = True
+            self.fname = to_bytes(str(fname))
+            if self.fname == b"-":
+                self.fname = to_bytes(b"/dev/stdin") if reading else to_bytes(b"/dev/stdout")
+            if self.fname.endswith(b".gz") and self.mode == b"w":
+                self.mode = b"wz"
+            elif self.fname.endswith((b".bcf", b".bcf.gz")) and self.mode == b"w":
+                self.mode = b"wb"
+            self.fname = to_bytes(str(fname))
+            self.mode = to_bytes(mode)
+            self.hts = hts_open(self.fname, self.mode)
+        # from a file descriptor
+        elif isinstance(fname, int):
+            self.mode = to_bytes(mode)
+            hf = hdopen(int(fname), self.mode)
+            self.hts = hts_hopen(hf, "<file>", self.mode)
+            self.fname = None
+        # reading from a File object or other object with fileno
+        elif hasattr(fname, "fileno"):
+            if fname.closed:
+                raise IOError('I/O operation on closed file')
+            self.mode = to_bytes(mode)
+            hf = hdopen(fname.fileno(), self.mode)
+            self.hts = hts_hopen(hf, "<file>", self.mode)
+            # .name can be TextIOWrapper
+            try:
+                self.fname = to_bytes(fname.name)
+            except AttributeError:
+                self.fname = None
+        else:
+            raise IOError("Cannot open '%s' for writing." % str(type(fname)))
+
+        if self.hts == NULL:
+            raise IOError("Error opening %s" % str(fname))
+        if reading:
+            if self.hts.format.format != vcf and self.hts.format.format != bcf:
+                raise IOError(
+                    "%s is not valid bcf or vcf (format: %s mode: %s)" % (fname, self.hts.format.format, mode)
+                )
+        else:
+            if self.hts.format.format != text_format and self.hts.format.format != binary_format:
+                raise IOError(
+                    "%s is not valid text_format or binary_format (format: %s mode: %s)" % (fname, self.hts.format.format, mode)
+                )
+
+    def close(self):
+        if self.hts != NULL:
+            if self.from_path:
+                hts_close(self.hts)
+            self.hts = NULL
+
+
+cdef class VCF(HTSFile):
     """
     VCF class holds methods to iterate over and query a VCF.
 
@@ -146,6 +229,8 @@ cdef class VCF:
         if True, then any '.' present in a genotype will classify the corresponding element in the gt_types array as UNKNOWN.
     samples: list
         list of samples to extract from full set in file.
+    threads: int
+        the number of threads to use including this reader.
 
 
     Returns
@@ -153,13 +238,11 @@ cdef class VCF:
     VCF object for iterating and querying.
     """
 
-    cdef htsFile *hts
     cdef const bcf_hdr_t *hdr
     cdef tbx_t *idx
     cdef hts_idx_t *hidx
     cdef int n_samples
     cdef int PASS
-    cdef bytes fname
     cdef bint gts012
     cdef bint lazy
     cdef bint strict_gt
@@ -178,25 +261,8 @@ cdef class VCF:
     cdef readonly int UNKNOWN
 
     def __init__(self, fname, mode="r", gts012=False, lazy=False, strict_gt=False, samples=None, threads=None):
-        cdef hFILE *hf
-
-        if isinstance(fname, basestring):
-            if fname == b"-" or fname == "-":
-                fname = b"/dev/stdin"
-            fname, mode = to_bytes(fname), to_bytes(mode)
-            self.hts = hts_open(fname, mode)
-            self.fname = fname
-        else:
-            mode = to_bytes(mode)
-            hf = hdopen(int(fname), mode)
-            self.hts = hts_hopen(hf, "<file>", mode)
-
-        if self.hts == NULL:
-            raise IOError("Error opening %s" % fname)
-        if self.hts.format.format != vcf and self.hts.format.format != bcf:
-            raise IOError("%s if not valid bcf or vcf" % fname)
-
         cdef bcf_hdr_t *hdr
+        self._open_htsfile(fname, mode)
         hdr = self.hdr = bcf_hdr_read(self.hts)
         if samples is not None:
             self.set_samples(samples)
@@ -367,7 +433,7 @@ cdef class VCF:
             yield from self
             raise StopIteration
 
-        if self.fname.decode(ENC).endswith('.bcf'):
+        if self.fname.decode(ENC).endswith(('.bcf', '.bcf.gz')):
             yield from self._bcf_region(region)
             raise StopIteration
 
@@ -491,13 +557,6 @@ cdef class VCF:
 
     contains = __contains__
 
-    def close(self):
-        if self.hts != NULL:
-            if self.fname != "-":
-                # TODO flush
-                hts_close(self.hts)
-            self.hts = NULL
-
     def __dealloc__(self):
         if self.hts != NULL and self.hdr != NULL:
             bcf_hdr_destroy(self.hdr)
@@ -533,11 +592,12 @@ cdef class VCF:
             return [str(self.hdr.samples[i].decode('utf-8')) for i in range(self.n_samples)]
 
     property raw_header:
-        "string of the raw header from the VCF"
-        def __get__(self):
-            cdef int hlen
-            s = bcf_hdr_fmt_text(self.hdr, 0, &hlen)
-            return from_bytes(s)
+         "string of the raw header from the VCF"
+         def __get__(self):
+             cdef kstring_t s
+             s.s, s.l, s.m = NULL, 0, 0
+             bcf_hdr_format(self.hdr, 0, &s)
+             return from_bytes(s.s)
 
     property seqlens:
         def __get__(self):
@@ -558,7 +618,7 @@ cdef class VCF:
             cdef char **cnames
             cdef int i, n = 0
             cnames = bcf_hdr_seqnames(self.hdr, &n)
-            if n == 0 and self.fname.decode(ENC).endswith('.bcf'):
+            if n == 0 and self.fname.decode(ENC).endswith(('.bcf', '.bcf.gz')):
                 if self.hidx == NULL:
                     self.hidx = bcf_index_load(self.fname)
                 if self.hidx != NULL:
@@ -1091,7 +1151,7 @@ cdef class Variant(object):
             cdef list d = [from_bytes(alleles[i]) for i in range(self.b.n_allele)]
             d.append(".") # -1 gives .
             cdef list bases = ["./." for _ in range(self.vcf.n_samples)]
-            cdef np.ndarray phased = self.gt_phases
+            cdef np.ndarray phased = (self.gt_phases).astype(int)
             cdef list lookup = ["/", "|"]
             cdef int unknown = 3 if self.vcf.gts012 else 2
             for i in range(0, n * self.vcf.n_samples, n):
@@ -1142,7 +1202,7 @@ cdef class Variant(object):
             return n
 
     property call_rate:
-        "proprtion of samples that were not UNKNOWN."
+        "proportion of samples that were not UNKNOWN."
         def __get__(self):
             if self.vcf.n_samples > 0:
                 return float(self.num_called) / self.vcf.n_samples
@@ -1287,7 +1347,7 @@ cdef class Variant(object):
         cdef int ndst = 0
         if bcf_get_genotypes(self.vcf.hdr, self.b, &gts, &ndst) <= 0:
             raise Exception("couldn't get genotypes for variant")
-        return newGenotypes(gts, ndst/self.vcf.n_samples, self.vcf.n_samples)
+        return newGenotypes(gts, int(ndst/self.vcf.n_samples), self.vcf.n_samples)
 
     @genotype.setter
     def genotype(self, Genotypes g):
@@ -1380,10 +1440,10 @@ cdef class Variant(object):
             size *= data.shape[1]
 
         cdef int ret
-        if np.issubdtype(data.dtype, np.int):
+        if np.issubdtype(data.dtype, np.signedinteger) or np.issubdtype(data.dtype, np.unsignedinteger):
             aint = data.astype(np.int32).reshape((size,))
             ret = bcf_update_format_int32(self.vcf.hdr, self.b, to_bytes(name), &aint[0], size)
-        elif np.issubdtype(data.dtype, np.float):
+        elif np.issubdtype(data.dtype, np.floating):
             afloat = data.astype(np.float32).reshape((size,))
             ret = bcf_update_format_float(self.vcf.hdr, self.b, to_bytes(name), &afloat[0], size)
         else:
@@ -1404,7 +1464,7 @@ cdef class Variant(object):
             if self._gt_types == NULL:
                 self._gt_phased = <int *>stdlib.malloc(sizeof(int) * self.vcf.n_samples)
                 ngts = bcf_get_genotypes(self.vcf.hdr, self.b, &self._gt_types, &ndst)
-                nper = ndst / self.vcf.n_samples
+                nper = int(ndst / self.vcf.n_samples)
                 self._ploidy = nper
                 self._gt_idxs = <int *>stdlib.malloc(sizeof(int) * self.vcf.n_samples * nper)
                 if ndst == 0 or nper == 0:
@@ -1460,7 +1520,7 @@ cdef class Variant(object):
                         if self._gt_pls[i] < 0:
                             self._gt_pls[i] = imax
 
-                self._gt_nper = nret / self.vcf.n_samples
+                self._gt_nper = int(nret / self.vcf.n_samples)
             cdef np.npy_intp shape[1]
             shape[0] = <np.npy_intp> self._gt_nper * self.vcf.n_samples
             if self._gt_pls != NULL:
@@ -1534,7 +1594,7 @@ cdef class Variant(object):
                 # GATK
                 nret = bcf_get_format_int32(self.vcf.hdr, self.b, "AD", &self._gt_ref_depths, &ndst)
                 if nret > 0:
-                    nper = nret / self.vcf.n_samples
+                    nper = int(nret / self.vcf.n_samples)
                     if nper == 1:
                         stdlib.free(self._gt_ref_depths); self._gt_ref_depths = NULL
                         return -1 + np.zeros(self.vcf.n_samples, np.int32)
@@ -1575,7 +1635,7 @@ cdef class Variant(object):
                 # GATK
                 nret = bcf_get_format_int32(self.vcf.hdr, self.b, "AD", &self._gt_alt_depths, &ndst)
                 if nret > 0:
-                    nper = nret / self.vcf.n_samples
+                    nper = int(nret / self.vcf.n_samples)
                     if nper == 1:
                         stdlib.free(self._gt_alt_depths); self._gt_alt_depths = NULL
                         return (-1 + np.zeros(self.vcf.n_samples, np.int32))
@@ -1590,7 +1650,7 @@ cdef class Variant(object):
                 elif nret == -1:
                     # Freebayes
                     nret = bcf_get_format_int32(self.vcf.hdr, self.b, "AO", &self._gt_alt_depths, &ndst)
-                    nper = nret / self.vcf.n_samples
+                    nper = int(nret / self.vcf.n_samples)
                     if nret < 0:
                         stdlib.free(self._gt_alt_depths); self._gt_alt_depths = NULL
                         return -1 + np.zeros(self.vcf.n_samples, np.int32)
@@ -1684,7 +1744,7 @@ cdef class Variant(object):
             return depth
 
     property gt_phases:
-        """get a boolean indicating wether each sample is phased as a numpy array."""
+        """get a boolean indicating whether each sample is phased as a numpy array."""
         def __get__(self):
             # run for side-effect
             if self._gt_phased == NULL:
@@ -2154,6 +2214,7 @@ cdef from_bytes(s):
     return s
 
 
+# TODO: make Writer extend HTSFile not VCF by moving common methods into HTSFile
 cdef class Writer(VCF):
     """
     Writer class makes a VCF Writer.
@@ -2176,15 +2237,7 @@ cdef class Writer(VCF):
     cdef const bcf_hdr_t *ohdr
 
     def __init__(Writer self, fname, VCF tmpl, mode="w"):
-        self.name = to_bytes(fname)
-        if fname.endswith(".gz") and mode == "w":
-            mode = "wz"
-        if fname.endswith(".bcf") and mode == "w":
-            mode = "wb"
-        self.hts = hts_open(self.name, to_bytes(mode))
-        if self.hts == NULL:
-            raise Exception("error opening file: %s" % self.name)
-
+        self._open_htsfile(fname, mode)
         bcf_hdr_sync(tmpl.hdr)
         self.ohdr = tmpl.hdr
         self.hdr = bcf_hdr_dup(tmpl.hdr)
@@ -2194,14 +2247,8 @@ cdef class Writer(VCF):
     @classmethod
     def from_string(Writer cls, fname, header_string, mode="w"):
         cdef Writer self = Writer.__new__(Writer)
-
-        self.name = to_bytes(fname)
-        if fname.endswith(".gz") and mode == "w":
-            mode = "wz"
-        if fname.endswith(".bcf") and mode == "w":
-            mode = "wb"
-        self.hts = hts_open(self.name, to_bytes(mode))
         cdef char *hmode = "w"
+        self._open_htsfile(fname, mode)
         self.hdr = bcf_hdr_init(hmode)
         self.ohdr = bcf_hdr_dup(self.hdr)
         if bcf_hdr_parse(self.hdr, to_bytes(header_string)) != 0:


=====================================
cyvcf2/tests/test_reader.py
=====================================
@@ -1,5 +1,5 @@
 from __future__ import print_function
-from cyvcf2 import VCF, Variant, Writer
+from ..cyvcf2 import VCF, Variant, Writer
 import numpy as np
 import os.path
 from nose.tools import assert_raises
@@ -7,6 +7,11 @@ import tempfile
 import sys
 import os
 import atexit
+try:
+  from pathlib import Path
+except ImportError:
+  from pathlib2 import Path  # python 2 backport
+
 
 HERE = os.path.dirname(__file__)
 VCF_PATH = os.path.join(HERE, "test.vcf.gz")
@@ -20,8 +25,29 @@ except NameError:
     basestring = (str, bytes)
 
 def test_init():
+    # string
     v = VCF(VCF_PATH)
     assert v
+    expected_count = sum(1 for _ in v)
+    v.close()
+
+    # Path
+    v = VCF(Path(VCF_PATH))
+    value = sum(1 for _ in v)
+    assert value == expected_count
+
+    # file descriptor
+    with open(VCF_PATH) as fp:
+        fd = fp.fileno()
+        v = VCF(fd)
+        assert sum(1 for _ in v) == expected_count
+        v.close()  # this should not close the file descriptor originally opened
+
+    # file-like object
+    with open(VCF_PATH) as fp:
+        v = VCF(fp)
+        assert sum(1 for _ in v) == expected_count
+        v.close()  # this should not close the file descriptor originally opened
 
 def test_type():
     vcf = VCF(VCF_PATH)
@@ -237,32 +263,46 @@ def test_writer_from_string():
     w.close()
 
 
-def test_writer():
-
-    v = VCF(VCF_PATH)
-    f = tempfile.mktemp(suffix=".vcf")
-    atexit.register(os.unlink, f)
-
-    o = Writer(f, v)
-    rec = next(v)
+def run_writer(writer, filename, rec):
     rec.INFO["AC"] = "3"
     rec.FILTER = ["LowQual"]
-    o.write_record(rec)
+    writer.write_record(rec)
 
     rec.FILTER = ["LowQual", "VQSRTrancheSNP99.90to100.00"]
-    o.write_record(rec)
-
+    writer.write_record(rec)
 
     rec.FILTER = "PASS"
-    o.write_record(rec)
+    writer.write_record(rec)
 
-    o.close()
+    writer.close()
 
     expected = ["LowQual", "LowQual;VQSRTrancheSNP99.90to100.00", None]
 
-    for i, variant in enumerate(VCF(f)):
+    for i, variant in enumerate(VCF(filename)):
         assert variant.FILTER == expected[i], (variant.FILTER, expected[i])
 
+def test_writer():
+    v = VCF(VCF_PATH)
+    f = tempfile.mktemp(suffix=".vcf")
+    atexit.register(os.unlink, f)
+    rec = next(v)
+
+    # string
+    run_writer(Writer(f, v), f, rec)
+
+    # Path
+    path = Path(f)
+    run_writer(Writer(path, v), f, rec)
+
+    # file descriptor
+    with open(VCF_PATH) as fp:
+        fd = fp.fileno()
+        run_writer(Writer(fd, v), f, rec)
+
+    # file-like object
+    with open(VCF_PATH) as fp:
+        run_writer(Writer(fp, v), f, rec)
+
 def test_add_info_to_header():
     v = VCF(VCF_PATH)
     v.add_info_to_header({'ID': 'abcdefg', 'Description': 'abcdefg',
@@ -583,7 +623,7 @@ def test_set_format_float():
     vcf = VCF('{}/test-format-string.vcf'.format(HERE))
     assert vcf.add_format_to_header(dict(ID="PS", Number=1, Type="Float", Description="PS example")) == 0
     v = next(vcf)
-    v.set_format("PS", np.array([0.555, 1.111], dtype=np.float))
+    v.set_format("PS", np.array([0.555, 1.111], dtype=np.float32))
     assert allclose(fmap(float, get_gt_str(v, "PS")), np.array([0.555, 1.111]))
 
     v.set_format("PS", np.array([8.555, 11.111], dtype=np.float64))
@@ -876,18 +916,18 @@ def test_strict_gt_option_flag():
 
     msg = "VCF(gts012=False, strict_gt=False) not working"
     truth_gt_types = (0, 3, 1, 1, 1, 1, 0, 0, 2)
-    assert tuple(variant.gt_bases.tolist()) == truth_gt_bases, '{} [gt_bases]'.format(msg)
-    assert tuple(variant.gt_types.tolist()) == truth_gt_types, '{} [gt_types]'.format(msg)
-    assert tuple(variant.genotypes) == truth_genotypes, '{} (genotypes)'.format(msg)
+    assert bool(tuple(variant.gt_bases.tolist()) == truth_gt_bases), '{} [gt_bases]'.format(msg)
+    assert bool(tuple(variant.gt_types.tolist()) == truth_gt_types), '{} [gt_types]'.format(msg)
+    assert bool(tuple(variant.genotypes) == truth_genotypes), '{} (genotypes)'.format(msg)
 
     vcf = VCF(test_vcf, gts012=False, strict_gt=True)
     variant = next(vcf)
 
     msg = "VCF(gts012=False, strict_gt=True) not working"
     truth_gt_types = (0, 3, 1, 1, 2, 2, 2, 2, 2)
-    assert tuple(variant.gt_bases.tolist()) == truth_gt_bases, '{} [gt_bases]'.format(msg)
-    assert tuple(variant.gt_types.tolist()) == truth_gt_types, '{} [gt_types]'.format(msg)
-    assert tuple(variant.genotypes) == truth_genotypes, '{} (genotypes)'.format(msg)
+    assert bool(tuple(variant.gt_bases.tolist()) == truth_gt_bases), '{} [gt_bases]'.format(msg)
+    assert bool(tuple(variant.gt_types.tolist()) == truth_gt_types), '{} [gt_types]'.format(msg)
+    assert bool(tuple(variant.genotypes) == truth_genotypes), '{} (genotypes)'.format(msg)
 
 
     vcf = VCF(test_vcf, gts012=True)


=====================================
debian/patches/add_htslib_link_dependency.patch
=====================================
@@ -1,18 +1,34 @@
-Author: Liubov Chuprikova <chuprikovalv at gmail.com>
-Last-Update: Fri, 21 December 2018 16:40:42 +0100
-Description: Add missing link dependency on htslib
+Author: Liubov Chuprikova <chuprikovalv at gmail.com>, Nilesh Patra <npatra974 at gmail.com>
+Last-Update: Thu, 9 July 2020 22:59:25 +0530
+Description: Add missing link dependency on htslib, cythonize modules
 
 
-Index: cyvcf2-0.10.4/setup.py
-===================================================================
---- cyvcf2-0.10.4.orig/setup.py
-+++ cyvcf2-0.10.4/setup.py
-@@ -67,7 +67,7 @@ from Cython.Distutils import build_ext
- cmdclass = {'build_ext': build_ext}
- extension = [Extension("cyvcf2.cyvcf2",
+--- a/setup.py
++++ b/setup.py
+@@ -56,15 +56,14 @@
+ 
+ extensions = [Extension("cyvcf2.cyvcf2",
                          ["cyvcf2/cyvcf2.pyx"] + sources,
 -                        libraries=['z', 'bz2', 'lzma', 'curl', 'ssl'] + (['crypt'] if platform.system() != 'Darwin' else []),
 +                        libraries=['z', 'bz2', 'lzma', 'curl', 'ssl', 'hts'] + (['crypt'] if platform.system() != 'Darwin' else []),
+                         extra_compile_args=["-Wno-sign-compare", "-Wno-unused-function",
+                             "-Wno-strict-prototypes",
+                             "-Wno-unused-result", "-Wno-discarded-qualifiers"],
                          include_dirs=['htslib', 'cyvcf2', np.get_include()])]
  
  
+-CYTHONIZE = bool(int(os.getenv("CYTHONIZE", 0)))
+-if CYTHONIZE:
++if "clean" not in sys.argv:
+     try:
+         from Cython.Build import cythonize
+     except ImportError:
+@@ -75,8 +74,6 @@
+         sys.exit(1)
+     compiler_directives = {"language_level": 2, "embedsignature": True}
+     extensions = cythonize(extensions, compiler_directives=compiler_directives)
+-else:
+-    extensions = no_cythonize(extensions)
+ 
+ 
+ setup(


=====================================
docs/source/conf.py
=====================================
@@ -53,7 +53,7 @@ master_doc = 'index'
 
 # General information about the project.
 project = u'cyvcf2'
-copyright = u'2016, Brent Pedersen'
+copyright = u'2016..2020, Brent Pedersen'
 author = u'Brent Pedersen'
 
 # The version info for the project you're documenting, acts as replacement for


=====================================
docs/source/index.rst
=====================================
@@ -60,7 +60,7 @@ Modifying Existing Records
 ==========================
 
 `cyvcf2` is optimized for fast reading and extraction from existing files.
-However, it also offers some means of modifying existing VCFs. Here, wrapper
+However, it also offers some means of modifying existing VCFs. Here, we
 show an example of how to annotate variants with the genes that they overlap.
 
 
@@ -74,7 +74,8 @@ show an example of how to annotate variants with the genes that they overlap.
         'Type':'Character', 'Number': '1'})
 
     # create a new vcf Writer using the input vcf as a template.
-    w = Writer(f, vcf)
+    fname = "out.vcf"
+    w = Writer(fname, vcf)
 
     for v in vcf:
         # The get_gene_intersections function is not shown.


=====================================
setup.py
=====================================
@@ -1,14 +1,20 @@
-from setuptools import setup, Extension
 import os
 import glob
 import sys
 import subprocess
+import platform
+
 import pkg_resources
+from setuptools import setup, Extension, dist
 
 if sys.version_info.major == 2 and sys.version_info.minor != 7:
     sys.stderr.write("ERROR: cyvcf2 is only for python 2.7 or greater you are running %d.%d\n", (sys.version_info.major, sys.version_info.minor))
     sys.exit(1)
 
+# Install numpy right now
+dist.Distribution().fetch_build_eggs(['numpy'])
+import numpy as np
+
 
 def get_version():
     """Get the version info from the mpld3 package without importing it"""
@@ -26,51 +32,53 @@ def get_version():
           raise ValueError("version could not be located")
 
 
-# Temporarily install dependencies required by setup.py before trying to import them.
-# From https://bitbucket.org/dholth/setup-requires
-
-sys.path[0:0] = ['setup-requires']
-pkg_resources.working_set.add_entry('setup-requires')
-
-
-def missing_requirements(specifiers):
-    for specifier in specifiers:
-        try:
-            pkg_resources.require(specifier)
-        except pkg_resources.DistributionNotFound:
-            yield specifier
-
-
-def install_requirements(specifiers):
-    to_install = list(specifiers)
-    if to_install:
-        cmd = [sys.executable, "-m", "pip", "install",
-            "-t", "setup-requires"] + to_install
-        subprocess.call(cmd)
-
-
-requires = ['cython', 'numpy', 'coloredlogs', 'click']
-install_requirements(missing_requirements(requires))
-
-
-excludes = ['irods', 'plugin']
-
-sources = [x for x in glob.glob('htslib/*.c') if not any(e in x for e in excludes)] + glob.glob('htslib/cram/*.c')
-# these have main()'s
+def no_cythonize(extensions, **_ignore):
+    for extension in extensions:
+        sources = []
+        for sfile in extension.sources:
+            path, ext = os.path.splitext(sfile)
+            if ext in (".pyx", ".py"):
+                sfile = path + ".c"
+            sources.append(sfile)
+        extension.sources[:] = sources
+    return extensions
+
+
+# Build the Cython extension by statically linking to the bundled htslib
+sources = [
+    x for x in glob.glob('htslib/*.c') 
+    if not any(e in x for e in ['irods', 'plugin'])
+]
+sources += glob.glob('htslib/cram/*.c')
+# Exclude the htslib sources containing main()'s
 sources = [x for x in sources if not x.endswith(('htsfile.c', 'tabix.c', 'bgzip.c'))]
 sources.append('cyvcf2/helpers.c')
 
-import numpy as np
-import platform
-from Cython.Distutils import build_ext
-
-cmdclass = {'build_ext': build_ext}
-extension = [Extension("cyvcf2.cyvcf2",
+extensions = [Extension("cyvcf2.cyvcf2",
                         ["cyvcf2/cyvcf2.pyx"] + sources,
                         libraries=['z', 'bz2', 'lzma', 'curl', 'ssl'] + (['crypt'] if platform.system() != 'Darwin' else []),
+                        extra_compile_args=["-Wno-sign-compare", "-Wno-unused-function",
+                            "-Wno-strict-prototypes",
+                            "-Wno-unused-result", "-Wno-discarded-qualifiers"],
                         include_dirs=['htslib', 'cyvcf2', np.get_include()])]
 
 
+CYTHONIZE = bool(int(os.getenv("CYTHONIZE", 0)))
+if CYTHONIZE:
+    try:
+        from Cython.Build import cythonize
+    except ImportError:
+        sys.stderr.write(
+            "Cannot find Cython. Have you installed all the requirements?\n"
+            "Try pip install -r requirements.txt\n"
+        )
+        sys.exit(1)
+    compiler_directives = {"language_level": 2, "embedsignature": True}
+    extensions = cythonize(extensions, compiler_directives=compiler_directives)
+else:
+    extensions = no_cythonize(extensions)
+
+
 setup(
     name="cyvcf2",
     description="fast vcf parsing with cython + htslib",
@@ -81,8 +89,7 @@ setup(
     author="Brent Pedersen",
     author_email="bpederse at gmail.com",
     version=get_version(),
-    cmdclass=cmdclass,
-    ext_modules=extension,
+    ext_modules=extensions,
     packages=['cyvcf2', 'cyvcf2.tests'],
     entry_points=dict(
         console_scripts=[
@@ -91,7 +98,7 @@ setup(
     ),
     test_suite='nose.collector',
     tests_require='nose',
-    install_requires=['numpy'],
+    install_requires=['numpy', 'coloredlogs', 'click'],
     include_package_data=True,
     zip_safe=False,
 )



View it on GitLab: https://salsa.debian.org/med-team/cyvcf2/-/compare/f2b112fd7a8ae0259fdcc34fef17f32373f25049...b3994365416c67e8376bda0b872e0c468fd6e925

-- 
View it on GitLab: https://salsa.debian.org/med-team/cyvcf2/-/compare/f2b112fd7a8ae0259fdcc34fef17f32373f25049...b3994365416c67e8376bda0b872e0c468fd6e925
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200709/683c61cc/attachment-0001.html>


More information about the debian-med-commit mailing list