[med-svn] [Git][med-team/cyvcf2][upstream] New upstream version 0.20.1
Nilesh Patra
gitlab at salsa.debian.org
Thu Jul 9 18:31:23 BST 2020
Nilesh Patra pushed to branch upstream at Debian Med / cyvcf2
Commits:
09dacf19 by Nilesh Patra at 2020-07-09T22:36:55+05:30
New upstream version 0.20.1
- - - - -
10 changed files:
- .gitignore
- .travis.yml
- README.md
- cyvcf2/__init__.py
- cyvcf2/cyvcf2.pxd
- cyvcf2/cyvcf2.pyx
- cyvcf2/tests/test_reader.py
- docs/source/conf.py
- docs/source/index.rst
- setup.py
Changes:
=====================================
.gitignore
=====================================
@@ -18,3 +18,4 @@ _static
_templates
setup-requires/*
.cache/v/cache/lastfailed
+.idea
=====================================
.travis.yml
=====================================
@@ -15,7 +15,7 @@ install:
- ./configure --enable-s3 --disable-lzma --disable-bz2
- make
- cd ..
- - python setup.py install
+ - CYTHONIZE=1 python setup.py install
script:
- python setup.py test
=====================================
README.md
=====================================
@@ -1,6 +1,8 @@
cyvcf2
======
+Note: cyvcf2 versions < 0.20.0 require htslib < 1.10. cyvcf2 versions >= 0.20.0 require htslib >= 1.10
+
<!-- ghp-import -p docs/build/html/ -->
[![Docs](https://img.shields.io/badge/docs-latest-blue.svg)](http://brentp.github.io/cyvcf2/)
@@ -9,7 +11,7 @@ If you use cyvcf2, please cite the [paper](https://academic.oup.com/bioinformati
Fast python **(2 and 3)** parsing of VCF and BCF including region-queries.
-[![Build Status](https://travis-ci.org/brentp/cyvcf2.svg?branch=master)](https://travis-ci.org/brentp/cyvcf2)
+[![Build Status](https://travis-ci.com/brentp/cyvcf2.svg?branch=master)](https://travis-ci.com/brentp/cyvcf2)
cyvcf2 is a cython wrapper around [htslib](https://github.com/samtools/htslib) built for fast parsing of [Variant Call Format](https://en.m.wikipedia.org/wiki/Variant_Call_Format) (VCF) files.
@@ -68,7 +70,7 @@ for v in vcf('11:435345-556565'):
Installation
============
-## pip
+## pip (assuming you have htslib < 1.10 installed)
```
pip install cyvcf2
```
@@ -84,7 +86,7 @@ autoconf
make
cd ..
-pip install -e .
+CYTHONIZE=1 pip install -e .
```
On **OSX**, using brew, you may have to set the following as indicated by the brew install:
=====================================
cyvcf2/__init__.py
=====================================
@@ -2,4 +2,4 @@ from .cyvcf2 import (VCF, Variant, Writer, r_ as r_unphased, par_relatedness,
par_het)
Reader = VCFReader = VCF
-__version__ = "0.11.6"
+__version__ = "0.20.1"
=====================================
cyvcf2/cyvcf2.pxd
=====================================
@@ -1,4 +1,4 @@
-from libc.stdint cimport int32_t, uint32_t, int8_t, int16_t, uint8_t
+from libc.stdint cimport int64_t, int32_t, uint32_t, int8_t, int16_t, uint8_t
import numpy as np
cimport numpy as np
np.import_array()
@@ -23,7 +23,7 @@ cdef extern from "htslib/kstring.h":
size_t l, m;
char *s;
- inline char *ks_release(kstring_t *s)
+ char *ks_release(kstring_t *s)
cdef extern from "htslib/hfile.h":
ctypedef struct hFILE:
@@ -79,7 +79,7 @@ cdef extern from "htslib/tbx.h":
tbx_t *tbx_index_load(const char *fn);
tbx_t *tbx_index_load2(const char *fn, const char *fnidx);
- hts_itr_t *tbx_itr_queryi(tbx_t *tbx, int tid, int beg, int end)
+ hts_itr_t *tbx_itr_queryi(tbx_t *tbx, int tid, int64_t beg, int64_t end)
hts_itr_t *tbx_itr_querys(tbx_t *tbx, char *reg) nogil
int tbx_itr_next(htsFile *fp, tbx_t *tbx, hts_itr_t *iter, void *data) nogil;
void tbx_destroy(tbx_t *tbx);
@@ -109,8 +109,6 @@ cdef extern from "htslib/vcf.h":
const int BCF_BT_FLOAT = 5
const int BCF_BT_CHAR = 7
- const int BCF_HT_FLAG = 0
-
const int bcf_str_missing = 0x07
const int bcf_str_vector_end = 0
@@ -139,13 +137,14 @@ cdef extern from "htslib/vcf.h":
ctypedef struct bcf_info_t:
int key; # key: numeric tag id, the corresponding string is bcf_hdr_t::id[BCF_DT_ID][$key].key
- int type, len; # type: one of BCF_BT_* types; len: vector length, 1 for scalars
+ int type; # type: one of BCF_BT_* types; len: vector length, 1 for scalars
#} v1; # only set if $len==1; for easier access
uv1 v1
uint8_t *vptr; # pointer to data array in bcf1_t->shared.s, excluding the size+type and tag id bytes
uint32_t vptr_len; # length of the vptr block or, when set, of the vptr_mod block, excluding offset
uint32_t vptr_off;
uint32_t vptr_free; # vptr offset, i.e., the size of the INFO key plus size+type bytes
+ int len;
# indicates that vptr-vptr_off must be freed; set only when modified and the new
@@ -164,12 +163,12 @@ cdef extern from "htslib/vcf.h":
int indiv_dirty; # if set, indiv.s must be recreated on BCF output
ctypedef struct bcf1_t:
+ int64_t pos; #// POS
+ int64_t rlen; #// length of REF
int32_t rid; #// CHROM
- int32_t pos; #// POS
- int32_t rlen; #// length of REF
float qual; #// QUAL
uint32_t n_info, n_allele;
- uint8_t n_fmt #//:8 #//, n_sample:24;
+ uint32_t n_fmt #//:8 #//, n_sample:24;
#kstring_t shared, indiv;
bcf_dec_t d; #// lazy evaluation: $d is not generated by bcf_read(), but by explicitly calling bcf_unpack()
int max_unpack; # // Set to BCF_UN_STR, BCF_UN_FLT, or BCF_UN_INFO to boost performance of vcf_parse when some of the fields wont be needed
@@ -215,6 +214,7 @@ cdef extern from "htslib/vcf.h":
int nsamples_ori; # for bcf_hdr_set_samples()
uint8_t *keep_samples;
kstring_t mem;
+ int32_t m[3];
void bcf_float_set(float *ptr, uint32_t value)
@@ -235,6 +235,8 @@ cdef extern from "htslib/vcf.h":
int bcf_hdr_nsamples(const bcf_hdr_t *hdr);
void bcf_hdr_destroy(const bcf_hdr_t *hdr)
char *bcf_hdr_fmt_text(const bcf_hdr_t *hdr, int is_bcf, int *len);
+ int bcf_hdr_format(const bcf_hdr_t *hdr, int is_bcf, kstring_t *str);
+
bcf_hdr_t *bcf_hdr_init(const char *mode);
int bcf_hdr_parse(bcf_hdr_t *hdr, char *htxt);
@@ -280,7 +282,7 @@ cdef extern from "htslib/vcf.h":
## genotypes
- inline void bcf_gt2alleles(int igt, int *a, int *b);
+ void bcf_gt2alleles(int igt, int *a, int *b);
int bcf_update_genotypes(const bcf_hdr_t *hdr, bcf1_t *line, const void *values, int n);
# idx is 0 for ref, 1... for alts...
int bcf_gt_phased(int idx);
=====================================
cyvcf2/cyvcf2.pyx
=====================================
@@ -11,7 +11,10 @@ import numpy as np
from array import array
import math
import ctypes
-
+try:
+ from pathlib import Path
+except ImportError:
+ from pathlib2 import Path # python 2 backport
from libc cimport stdlib
cimport numpy as np
@@ -130,7 +133,87 @@ cdef set_constants(VCF v):
v.HOM_ALT = 3
-cdef class VCF:
+
+cdef class HTSFile:
+
+ cdef htsFile *hts
+ cdef bytes fname
+ cdef bytes mode
+ cdef bint from_path
+
+ cdef _open_htsfile(self, fname, mode):
+ """Opens an htsfile for reading or writing.
+
+ Parameters
+ ----------
+ fname: str
+ filename (str or Path), file descriptor (int), or file-like object (has fileno method).
+ mode: str
+ the mode to pass to hts_open.
+ """
+ cdef hFILE *hf
+ self.mode = to_bytes(mode)
+ reading = b"r" in self.mode
+ if not reading and b"w" not in self.mode:
+ raise IOError("No 'r' or 'w' in mode %s" % str(self.mode))
+ self.from_path = False
+ # for htslib, wbu seems to not work
+ if mode == b"wbu":
+ mode = to_bytes(b"wb0")
+ if isinstance(fname, (basestring, Path)):
+ self.from_path = True
+ self.fname = to_bytes(str(fname))
+ if self.fname == b"-":
+ self.fname = to_bytes(b"/dev/stdin") if reading else to_bytes(b"/dev/stdout")
+ if self.fname.endswith(b".gz") and self.mode == b"w":
+ self.mode = b"wz"
+ elif self.fname.endswith((b".bcf", b".bcf.gz")) and self.mode == b"w":
+ self.mode = b"wb"
+ self.fname = to_bytes(str(fname))
+ self.mode = to_bytes(mode)
+ self.hts = hts_open(self.fname, self.mode)
+ # from a file descriptor
+ elif isinstance(fname, int):
+ self.mode = to_bytes(mode)
+ hf = hdopen(int(fname), self.mode)
+ self.hts = hts_hopen(hf, "<file>", self.mode)
+ self.fname = None
+ # reading from a File object or other object with fileno
+ elif hasattr(fname, "fileno"):
+ if fname.closed:
+ raise IOError('I/O operation on closed file')
+ self.mode = to_bytes(mode)
+ hf = hdopen(fname.fileno(), self.mode)
+ self.hts = hts_hopen(hf, "<file>", self.mode)
+ # .name can be TextIOWrapper
+ try:
+ self.fname = to_bytes(fname.name)
+ except AttributeError:
+ self.fname = None
+ else:
+ raise IOError("Cannot open '%s' for writing." % str(type(fname)))
+
+ if self.hts == NULL:
+ raise IOError("Error opening %s" % str(fname))
+ if reading:
+ if self.hts.format.format != vcf and self.hts.format.format != bcf:
+ raise IOError(
+ "%s is not valid bcf or vcf (format: %s mode: %s)" % (fname, self.hts.format.format, mode)
+ )
+ else:
+ if self.hts.format.format != text_format and self.hts.format.format != binary_format:
+ raise IOError(
+ "%s is not valid text_format or binary_format (format: %s mode: %s)" % (fname, self.hts.format.format, mode)
+ )
+
+ def close(self):
+ if self.hts != NULL:
+ if self.from_path:
+ hts_close(self.hts)
+ self.hts = NULL
+
+
+cdef class VCF(HTSFile):
"""
VCF class holds methods to iterate over and query a VCF.
@@ -146,6 +229,8 @@ cdef class VCF:
if True, then any '.' present in a genotype will classify the corresponding element in the gt_types array as UNKNOWN.
samples: list
list of samples to extract from full set in file.
+ threads: int
+ the number of threads to use including this reader.
Returns
@@ -153,13 +238,11 @@ cdef class VCF:
VCF object for iterating and querying.
"""
- cdef htsFile *hts
cdef const bcf_hdr_t *hdr
cdef tbx_t *idx
cdef hts_idx_t *hidx
cdef int n_samples
cdef int PASS
- cdef bytes fname
cdef bint gts012
cdef bint lazy
cdef bint strict_gt
@@ -178,25 +261,8 @@ cdef class VCF:
cdef readonly int UNKNOWN
def __init__(self, fname, mode="r", gts012=False, lazy=False, strict_gt=False, samples=None, threads=None):
- cdef hFILE *hf
-
- if isinstance(fname, basestring):
- if fname == b"-" or fname == "-":
- fname = b"/dev/stdin"
- fname, mode = to_bytes(fname), to_bytes(mode)
- self.hts = hts_open(fname, mode)
- self.fname = fname
- else:
- mode = to_bytes(mode)
- hf = hdopen(int(fname), mode)
- self.hts = hts_hopen(hf, "<file>", mode)
-
- if self.hts == NULL:
- raise IOError("Error opening %s" % fname)
- if self.hts.format.format != vcf and self.hts.format.format != bcf:
- raise IOError("%s if not valid bcf or vcf" % fname)
-
cdef bcf_hdr_t *hdr
+ self._open_htsfile(fname, mode)
hdr = self.hdr = bcf_hdr_read(self.hts)
if samples is not None:
self.set_samples(samples)
@@ -367,7 +433,7 @@ cdef class VCF:
yield from self
raise StopIteration
- if self.fname.decode(ENC).endswith('.bcf'):
+ if self.fname.decode(ENC).endswith(('.bcf', '.bcf.gz')):
yield from self._bcf_region(region)
raise StopIteration
@@ -491,13 +557,6 @@ cdef class VCF:
contains = __contains__
- def close(self):
- if self.hts != NULL:
- if self.fname != "-":
- # TODO flush
- hts_close(self.hts)
- self.hts = NULL
-
def __dealloc__(self):
if self.hts != NULL and self.hdr != NULL:
bcf_hdr_destroy(self.hdr)
@@ -533,11 +592,12 @@ cdef class VCF:
return [str(self.hdr.samples[i].decode('utf-8')) for i in range(self.n_samples)]
property raw_header:
- "string of the raw header from the VCF"
- def __get__(self):
- cdef int hlen
- s = bcf_hdr_fmt_text(self.hdr, 0, &hlen)
- return from_bytes(s)
+ "string of the raw header from the VCF"
+ def __get__(self):
+ cdef kstring_t s
+ s.s, s.l, s.m = NULL, 0, 0
+ bcf_hdr_format(self.hdr, 0, &s)
+ return from_bytes(s.s)
property seqlens:
def __get__(self):
@@ -558,7 +618,7 @@ cdef class VCF:
cdef char **cnames
cdef int i, n = 0
cnames = bcf_hdr_seqnames(self.hdr, &n)
- if n == 0 and self.fname.decode(ENC).endswith('.bcf'):
+ if n == 0 and self.fname.decode(ENC).endswith(('.bcf', '.bcf.gz')):
if self.hidx == NULL:
self.hidx = bcf_index_load(self.fname)
if self.hidx != NULL:
@@ -1091,7 +1151,7 @@ cdef class Variant(object):
cdef list d = [from_bytes(alleles[i]) for i in range(self.b.n_allele)]
d.append(".") # -1 gives .
cdef list bases = ["./." for _ in range(self.vcf.n_samples)]
- cdef np.ndarray phased = self.gt_phases
+ cdef np.ndarray phased = (self.gt_phases).astype(int)
cdef list lookup = ["/", "|"]
cdef int unknown = 3 if self.vcf.gts012 else 2
for i in range(0, n * self.vcf.n_samples, n):
@@ -1142,7 +1202,7 @@ cdef class Variant(object):
return n
property call_rate:
- "proprtion of samples that were not UNKNOWN."
+ "proportion of samples that were not UNKNOWN."
def __get__(self):
if self.vcf.n_samples > 0:
return float(self.num_called) / self.vcf.n_samples
@@ -1287,7 +1347,7 @@ cdef class Variant(object):
cdef int ndst = 0
if bcf_get_genotypes(self.vcf.hdr, self.b, >s, &ndst) <= 0:
raise Exception("couldn't get genotypes for variant")
- return newGenotypes(gts, ndst/self.vcf.n_samples, self.vcf.n_samples)
+ return newGenotypes(gts, int(ndst/self.vcf.n_samples), self.vcf.n_samples)
@genotype.setter
def genotype(self, Genotypes g):
@@ -1380,10 +1440,10 @@ cdef class Variant(object):
size *= data.shape[1]
cdef int ret
- if np.issubdtype(data.dtype, np.int):
+ if np.issubdtype(data.dtype, np.signedinteger) or np.issubdtype(data.dtype, np.unsignedinteger):
aint = data.astype(np.int32).reshape((size,))
ret = bcf_update_format_int32(self.vcf.hdr, self.b, to_bytes(name), &aint[0], size)
- elif np.issubdtype(data.dtype, np.float):
+ elif np.issubdtype(data.dtype, np.floating):
afloat = data.astype(np.float32).reshape((size,))
ret = bcf_update_format_float(self.vcf.hdr, self.b, to_bytes(name), &afloat[0], size)
else:
@@ -1404,7 +1464,7 @@ cdef class Variant(object):
if self._gt_types == NULL:
self._gt_phased = <int *>stdlib.malloc(sizeof(int) * self.vcf.n_samples)
ngts = bcf_get_genotypes(self.vcf.hdr, self.b, &self._gt_types, &ndst)
- nper = ndst / self.vcf.n_samples
+ nper = int(ndst / self.vcf.n_samples)
self._ploidy = nper
self._gt_idxs = <int *>stdlib.malloc(sizeof(int) * self.vcf.n_samples * nper)
if ndst == 0 or nper == 0:
@@ -1460,7 +1520,7 @@ cdef class Variant(object):
if self._gt_pls[i] < 0:
self._gt_pls[i] = imax
- self._gt_nper = nret / self.vcf.n_samples
+ self._gt_nper = int(nret / self.vcf.n_samples)
cdef np.npy_intp shape[1]
shape[0] = <np.npy_intp> self._gt_nper * self.vcf.n_samples
if self._gt_pls != NULL:
@@ -1534,7 +1594,7 @@ cdef class Variant(object):
# GATK
nret = bcf_get_format_int32(self.vcf.hdr, self.b, "AD", &self._gt_ref_depths, &ndst)
if nret > 0:
- nper = nret / self.vcf.n_samples
+ nper = int(nret / self.vcf.n_samples)
if nper == 1:
stdlib.free(self._gt_ref_depths); self._gt_ref_depths = NULL
return -1 + np.zeros(self.vcf.n_samples, np.int32)
@@ -1575,7 +1635,7 @@ cdef class Variant(object):
# GATK
nret = bcf_get_format_int32(self.vcf.hdr, self.b, "AD", &self._gt_alt_depths, &ndst)
if nret > 0:
- nper = nret / self.vcf.n_samples
+ nper = int(nret / self.vcf.n_samples)
if nper == 1:
stdlib.free(self._gt_alt_depths); self._gt_alt_depths = NULL
return (-1 + np.zeros(self.vcf.n_samples, np.int32))
@@ -1590,7 +1650,7 @@ cdef class Variant(object):
elif nret == -1:
# Freebayes
nret = bcf_get_format_int32(self.vcf.hdr, self.b, "AO", &self._gt_alt_depths, &ndst)
- nper = nret / self.vcf.n_samples
+ nper = int(nret / self.vcf.n_samples)
if nret < 0:
stdlib.free(self._gt_alt_depths); self._gt_alt_depths = NULL
return -1 + np.zeros(self.vcf.n_samples, np.int32)
@@ -1684,7 +1744,7 @@ cdef class Variant(object):
return depth
property gt_phases:
- """get a boolean indicating wether each sample is phased as a numpy array."""
+ """get a boolean indicating whether each sample is phased as a numpy array."""
def __get__(self):
# run for side-effect
if self._gt_phased == NULL:
@@ -2154,6 +2214,7 @@ cdef from_bytes(s):
return s
+# TODO: make Writer extend HTSFile not VCF by moving common methods into HTSFile
cdef class Writer(VCF):
"""
Writer class makes a VCF Writer.
@@ -2176,15 +2237,7 @@ cdef class Writer(VCF):
cdef const bcf_hdr_t *ohdr
def __init__(Writer self, fname, VCF tmpl, mode="w"):
- self.name = to_bytes(fname)
- if fname.endswith(".gz") and mode == "w":
- mode = "wz"
- if fname.endswith(".bcf") and mode == "w":
- mode = "wb"
- self.hts = hts_open(self.name, to_bytes(mode))
- if self.hts == NULL:
- raise Exception("error opening file: %s" % self.name)
-
+ self._open_htsfile(fname, mode)
bcf_hdr_sync(tmpl.hdr)
self.ohdr = tmpl.hdr
self.hdr = bcf_hdr_dup(tmpl.hdr)
@@ -2194,14 +2247,8 @@ cdef class Writer(VCF):
@classmethod
def from_string(Writer cls, fname, header_string, mode="w"):
cdef Writer self = Writer.__new__(Writer)
-
- self.name = to_bytes(fname)
- if fname.endswith(".gz") and mode == "w":
- mode = "wz"
- if fname.endswith(".bcf") and mode == "w":
- mode = "wb"
- self.hts = hts_open(self.name, to_bytes(mode))
cdef char *hmode = "w"
+ self._open_htsfile(fname, mode)
self.hdr = bcf_hdr_init(hmode)
self.ohdr = bcf_hdr_dup(self.hdr)
if bcf_hdr_parse(self.hdr, to_bytes(header_string)) != 0:
=====================================
cyvcf2/tests/test_reader.py
=====================================
@@ -1,5 +1,5 @@
from __future__ import print_function
-from cyvcf2 import VCF, Variant, Writer
+from ..cyvcf2 import VCF, Variant, Writer
import numpy as np
import os.path
from nose.tools import assert_raises
@@ -7,6 +7,11 @@ import tempfile
import sys
import os
import atexit
+try:
+ from pathlib import Path
+except ImportError:
+ from pathlib2 import Path # python 2 backport
+
HERE = os.path.dirname(__file__)
VCF_PATH = os.path.join(HERE, "test.vcf.gz")
@@ -20,8 +25,29 @@ except NameError:
basestring = (str, bytes)
def test_init():
+ # string
v = VCF(VCF_PATH)
assert v
+ expected_count = sum(1 for _ in v)
+ v.close()
+
+ # Path
+ v = VCF(Path(VCF_PATH))
+ value = sum(1 for _ in v)
+ assert value == expected_count
+
+ # file descriptor
+ with open(VCF_PATH) as fp:
+ fd = fp.fileno()
+ v = VCF(fd)
+ assert sum(1 for _ in v) == expected_count
+ v.close() # this should not close the file descriptor originally opened
+
+ # file-like object
+ with open(VCF_PATH) as fp:
+ v = VCF(fp)
+ assert sum(1 for _ in v) == expected_count
+ v.close() # this should not close the file descriptor originally opened
def test_type():
vcf = VCF(VCF_PATH)
@@ -237,32 +263,46 @@ def test_writer_from_string():
w.close()
-def test_writer():
-
- v = VCF(VCF_PATH)
- f = tempfile.mktemp(suffix=".vcf")
- atexit.register(os.unlink, f)
-
- o = Writer(f, v)
- rec = next(v)
+def run_writer(writer, filename, rec):
rec.INFO["AC"] = "3"
rec.FILTER = ["LowQual"]
- o.write_record(rec)
+ writer.write_record(rec)
rec.FILTER = ["LowQual", "VQSRTrancheSNP99.90to100.00"]
- o.write_record(rec)
-
+ writer.write_record(rec)
rec.FILTER = "PASS"
- o.write_record(rec)
+ writer.write_record(rec)
- o.close()
+ writer.close()
expected = ["LowQual", "LowQual;VQSRTrancheSNP99.90to100.00", None]
- for i, variant in enumerate(VCF(f)):
+ for i, variant in enumerate(VCF(filename)):
assert variant.FILTER == expected[i], (variant.FILTER, expected[i])
+def test_writer():
+ v = VCF(VCF_PATH)
+ f = tempfile.mktemp(suffix=".vcf")
+ atexit.register(os.unlink, f)
+ rec = next(v)
+
+ # string
+ run_writer(Writer(f, v), f, rec)
+
+ # Path
+ path = Path(f)
+ run_writer(Writer(path, v), f, rec)
+
+ # file descriptor
+ with open(VCF_PATH) as fp:
+ fd = fp.fileno()
+ run_writer(Writer(fd, v), f, rec)
+
+ # file-like object
+ with open(VCF_PATH) as fp:
+ run_writer(Writer(fp, v), f, rec)
+
def test_add_info_to_header():
v = VCF(VCF_PATH)
v.add_info_to_header({'ID': 'abcdefg', 'Description': 'abcdefg',
@@ -583,7 +623,7 @@ def test_set_format_float():
vcf = VCF('{}/test-format-string.vcf'.format(HERE))
assert vcf.add_format_to_header(dict(ID="PS", Number=1, Type="Float", Description="PS example")) == 0
v = next(vcf)
- v.set_format("PS", np.array([0.555, 1.111], dtype=np.float))
+ v.set_format("PS", np.array([0.555, 1.111], dtype=np.float32))
assert allclose(fmap(float, get_gt_str(v, "PS")), np.array([0.555, 1.111]))
v.set_format("PS", np.array([8.555, 11.111], dtype=np.float64))
@@ -876,18 +916,18 @@ def test_strict_gt_option_flag():
msg = "VCF(gts012=False, strict_gt=False) not working"
truth_gt_types = (0, 3, 1, 1, 1, 1, 0, 0, 2)
- assert tuple(variant.gt_bases.tolist()) == truth_gt_bases, '{} [gt_bases]'.format(msg)
- assert tuple(variant.gt_types.tolist()) == truth_gt_types, '{} [gt_types]'.format(msg)
- assert tuple(variant.genotypes) == truth_genotypes, '{} (genotypes)'.format(msg)
+ assert bool(tuple(variant.gt_bases.tolist()) == truth_gt_bases), '{} [gt_bases]'.format(msg)
+ assert bool(tuple(variant.gt_types.tolist()) == truth_gt_types), '{} [gt_types]'.format(msg)
+ assert bool(tuple(variant.genotypes) == truth_genotypes), '{} (genotypes)'.format(msg)
vcf = VCF(test_vcf, gts012=False, strict_gt=True)
variant = next(vcf)
msg = "VCF(gts012=False, strict_gt=True) not working"
truth_gt_types = (0, 3, 1, 1, 2, 2, 2, 2, 2)
- assert tuple(variant.gt_bases.tolist()) == truth_gt_bases, '{} [gt_bases]'.format(msg)
- assert tuple(variant.gt_types.tolist()) == truth_gt_types, '{} [gt_types]'.format(msg)
- assert tuple(variant.genotypes) == truth_genotypes, '{} (genotypes)'.format(msg)
+ assert bool(tuple(variant.gt_bases.tolist()) == truth_gt_bases), '{} [gt_bases]'.format(msg)
+ assert bool(tuple(variant.gt_types.tolist()) == truth_gt_types), '{} [gt_types]'.format(msg)
+ assert bool(tuple(variant.genotypes) == truth_genotypes), '{} (genotypes)'.format(msg)
vcf = VCF(test_vcf, gts012=True)
=====================================
docs/source/conf.py
=====================================
@@ -53,7 +53,7 @@ master_doc = 'index'
# General information about the project.
project = u'cyvcf2'
-copyright = u'2016, Brent Pedersen'
+copyright = u'2016..2020, Brent Pedersen'
author = u'Brent Pedersen'
# The version info for the project you're documenting, acts as replacement for
=====================================
docs/source/index.rst
=====================================
@@ -60,7 +60,7 @@ Modifying Existing Records
==========================
`cyvcf2` is optimized for fast reading and extraction from existing files.
-However, it also offers some means of modifying existing VCFs. Here, wrapper
+However, it also offers some means of modifying existing VCFs. Here, we
show an example of how to annotate variants with the genes that they overlap.
@@ -74,7 +74,8 @@ show an example of how to annotate variants with the genes that they overlap.
'Type':'Character', 'Number': '1'})
# create a new vcf Writer using the input vcf as a template.
- w = Writer(f, vcf)
+ fname = "out.vcf"
+ w = Writer(fname, vcf)
for v in vcf:
# The get_gene_intersections function is not shown.
=====================================
setup.py
=====================================
@@ -1,14 +1,20 @@
-from setuptools import setup, Extension
import os
import glob
import sys
import subprocess
+import platform
+
import pkg_resources
+from setuptools import setup, Extension, dist
if sys.version_info.major == 2 and sys.version_info.minor != 7:
sys.stderr.write("ERROR: cyvcf2 is only for python 2.7 or greater you are running %d.%d\n", (sys.version_info.major, sys.version_info.minor))
sys.exit(1)
+# Install numpy right now
+dist.Distribution().fetch_build_eggs(['numpy'])
+import numpy as np
+
def get_version():
"""Get the version info from the mpld3 package without importing it"""
@@ -26,51 +32,53 @@ def get_version():
raise ValueError("version could not be located")
-# Temporarily install dependencies required by setup.py before trying to import them.
-# From https://bitbucket.org/dholth/setup-requires
-
-sys.path[0:0] = ['setup-requires']
-pkg_resources.working_set.add_entry('setup-requires')
-
-
-def missing_requirements(specifiers):
- for specifier in specifiers:
- try:
- pkg_resources.require(specifier)
- except pkg_resources.DistributionNotFound:
- yield specifier
-
-
-def install_requirements(specifiers):
- to_install = list(specifiers)
- if to_install:
- cmd = [sys.executable, "-m", "pip", "install",
- "-t", "setup-requires"] + to_install
- subprocess.call(cmd)
-
-
-requires = ['cython', 'numpy', 'coloredlogs', 'click']
-install_requirements(missing_requirements(requires))
-
-
-excludes = ['irods', 'plugin']
-
-sources = [x for x in glob.glob('htslib/*.c') if not any(e in x for e in excludes)] + glob.glob('htslib/cram/*.c')
-# these have main()'s
+def no_cythonize(extensions, **_ignore):
+ for extension in extensions:
+ sources = []
+ for sfile in extension.sources:
+ path, ext = os.path.splitext(sfile)
+ if ext in (".pyx", ".py"):
+ sfile = path + ".c"
+ sources.append(sfile)
+ extension.sources[:] = sources
+ return extensions
+
+
+# Build the Cython extension by statically linking to the bundled htslib
+sources = [
+ x for x in glob.glob('htslib/*.c')
+ if not any(e in x for e in ['irods', 'plugin'])
+]
+sources += glob.glob('htslib/cram/*.c')
+# Exclude the htslib sources containing main()'s
sources = [x for x in sources if not x.endswith(('htsfile.c', 'tabix.c', 'bgzip.c'))]
sources.append('cyvcf2/helpers.c')
-import numpy as np
-import platform
-from Cython.Distutils import build_ext
-
-cmdclass = {'build_ext': build_ext}
-extension = [Extension("cyvcf2.cyvcf2",
+extensions = [Extension("cyvcf2.cyvcf2",
["cyvcf2/cyvcf2.pyx"] + sources,
libraries=['z', 'bz2', 'lzma', 'curl', 'ssl'] + (['crypt'] if platform.system() != 'Darwin' else []),
+ extra_compile_args=["-Wno-sign-compare", "-Wno-unused-function",
+ "-Wno-strict-prototypes",
+ "-Wno-unused-result", "-Wno-discarded-qualifiers"],
include_dirs=['htslib', 'cyvcf2', np.get_include()])]
+CYTHONIZE = bool(int(os.getenv("CYTHONIZE", 0)))
+if CYTHONIZE:
+ try:
+ from Cython.Build import cythonize
+ except ImportError:
+ sys.stderr.write(
+ "Cannot find Cython. Have you installed all the requirements?\n"
+ "Try pip install -r requirements.txt\n"
+ )
+ sys.exit(1)
+ compiler_directives = {"language_level": 2, "embedsignature": True}
+ extensions = cythonize(extensions, compiler_directives=compiler_directives)
+else:
+ extensions = no_cythonize(extensions)
+
+
setup(
name="cyvcf2",
description="fast vcf parsing with cython + htslib",
@@ -81,8 +89,7 @@ setup(
author="Brent Pedersen",
author_email="bpederse at gmail.com",
version=get_version(),
- cmdclass=cmdclass,
- ext_modules=extension,
+ ext_modules=extensions,
packages=['cyvcf2', 'cyvcf2.tests'],
entry_points=dict(
console_scripts=[
@@ -91,7 +98,7 @@ setup(
),
test_suite='nose.collector',
tests_require='nose',
- install_requires=['numpy'],
+ install_requires=['numpy', 'coloredlogs', 'click'],
include_package_data=True,
zip_safe=False,
)
View it on GitLab: https://salsa.debian.org/med-team/cyvcf2/-/commit/09dacf19d9e0e0bfd2c60d38d1141c31106cffff
--
View it on GitLab: https://salsa.debian.org/med-team/cyvcf2/-/commit/09dacf19d9e0e0bfd2c60d38d1141c31106cffff
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200709/dd46f6bf/attachment-0001.html>
More information about the debian-med-commit
mailing list