[med-svn] [Git][med-team/cyvcf2][master] 6 commits: New upstream version 0.32.0
Étienne Mollier (@emollier)
gitlab at salsa.debian.org
Sun Feb 22 16:34:55 GMT 2026
Étienne Mollier pushed to branch master at Debian Med / cyvcf2
Commits:
9fd0cf69 by Étienne Mollier at 2026-02-22T17:24:16+01:00
New upstream version 0.32.0
- - - - -
48aebd1c by Étienne Mollier at 2026-02-22T17:24:17+01:00
Update upstream source from tag 'upstream/0.32.0'
Update to upstream version '0.32.0'
with Debian dir 28f1818cd268d54e4948303f910cd67ded00dc05
- - - - -
96a2905e by Étienne Mollier at 2026-02-22T17:25:12+01:00
i386.patch: refresh.
- - - - -
60ef0d40 by Étienne Mollier at 2026-02-22T17:25:30+01:00
d/control: drop redundant Priority: optional.
- - - - -
c375442e by Étienne Mollier at 2026-02-22T17:26:07+01:00
d/control: declare compliance to standards version 4.7.3.
- - - - -
9b5f9390 by Étienne Mollier at 2026-02-22T17:34:27+01:00
d/changelog: ready for upload to unstable.
- - - - -
11 changed files:
- .gitignore
- CHANGES.md
- README.md
- cyvcf2/__init__.py
- cyvcf2/cyvcf2.pxd
- cyvcf2/cyvcf2.pyx
- cyvcf2/tests/test_reader.py
- debian/changelog
- debian/control
- debian/patches/i386.patch
- docs/source/index.rst
Changes:
=====================================
.gitignore
=====================================
@@ -1,3 +1,5 @@
+out.vcf
+test.vcf
*.pyc
cyvcf2.egg-info/
build
@@ -20,3 +22,4 @@ setup-requires/*
.cache/v/cache/lastfailed
.idea
**/__pycache__
+.coverage
=====================================
CHANGES.md
=====================================
@@ -1,3 +1,10 @@
+# v0.32.0
++ make repeated `vcf()` calls restart from the beginning and isolate region
+ queries from the main iterator state. *NOTE* this can be backwards incompatible for those using vcf() to iterate without an index.
+
+# v0.31.5
++ allow removing a header line (thanks @fxwiegand #323)
+
# v0.31.4
+ add python 3.14 wheels (thanks @benjeffery #321)
=====================================
README.md
=====================================
@@ -65,6 +65,11 @@ vcf = VCF('some.vcf.gz')
for v in vcf('11:435345-556565'):
if v.INFO["AF"] > 0.1: continue
print(str(v))
+
+# to query "all records" via __call__:
+# this uses the index (HTS_IDX_START), so an index is required.
+# if no index is available, this yields zero records.
+all_vars = list(vcf())
```
Installation
=====================================
cyvcf2/__init__.py
=====================================
@@ -2,4 +2,4 @@ from .cyvcf2 import (VCF, Variant, Writer, r_ as r_unphased, par_relatedness,
par_het)
Reader = VCFReader = VCF
-__version__ = "0.31.4"
+__version__ = "0.32.0"
=====================================
cyvcf2/cyvcf2.pxd
=====================================
@@ -3,7 +3,7 @@ import numpy as np
cimport numpy as np
np.import_array()
-cdef extern from "string.h":
+cdef extern from "string.h":
void* memcpy(void* dest, const void* src, size_t n)
cdef extern from "relatedness.h":
@@ -38,6 +38,9 @@ cdef extern from "htslib/hts.h":
int hts_set_threads(htsFile *fp, int n);
+ cdef enum:
+ HTS_IDX_START
+
cdef union ufp:
hFILE *hfile;
@@ -49,7 +52,7 @@ cdef extern from "htslib/hts.h":
ctypedef struct htsFormat:
htsExactFormat format
-
+
ctypedef struct htsFile:
ufp fp
htsFormat format
@@ -101,6 +104,7 @@ cdef extern from "htslib/vcf.h":
pass
int bcf_itr_next(htsFile *, hts_itr_t* iter, bcf1_t*)
+ hts_itr_t *bcf_itr_queryi(hts_idx_t *, int tid, int64_t beg, int64_t end)
hts_itr_t *bcf_itr_querys(hts_idx_t *, void *, char *);
@@ -259,6 +263,7 @@ cdef extern from "htslib/vcf.h":
void bcf_hrec_destroy(bcf_hrec_t *)
bcf_hrec_t *bcf_hdr_id2hrec(const bcf_hdr_t *hdr, int type, int idx, int rid);
int bcf_hdr_add_hrec(bcf_hdr_t *hdr, bcf_hrec_t *hrec);
+ void bcf_hdr_remove(bcf_hdr_t *hdr, int type, const char *key);
int hts_close(htsFile *fp);
=====================================
cyvcf2/cyvcf2.pyx
=====================================
@@ -468,9 +468,65 @@ cdef class VCF(HTSFile):
Returns
-------
An Iterator over the requested region.
+
+ Notes
+ -----
+ If `region` is empty, cyvcf2 uses `HTS_IDX_START` to iterate from the
+ start of the index. This mode requires an index; without an index, the
+ iterator is empty.
"""
+ cdef hts_itr_t *itr
+ cdef kstring_t s
+ cdef bcf1_t *b
+ cdef int slen = 1, ret = 0
+ cdef bytes bregion
+ cdef char *cregion
+
if not region:
- yield from self
+ self._open_index()
+ if self.hidx == NULL and self.idx == NULL:
+ return
+
+ if self.hidx != NULL:
+ itr = bcf_itr_queryi(self.hidx, HTS_IDX_START, 0, 0)
+ if itr == NULL:
+ return
+ try:
+ while True:
+ b = bcf_init()
+ ret = bcf_itr_next(self.hts, itr, b)
+ if ret < 0:
+ bcf_destroy(b)
+ break
+ ret = bcf_subset_format(self.hdr, b)
+ assert ret == 0, ("could not subset variant", self.fname, region)
+ yield newVariant(b, self)
+ finally:
+ hts_itr_destroy(itr)
+ return
+
+ itr = tbx_itr_queryi(self.idx, HTS_IDX_START, 0, 0)
+ if itr == NULL:
+ # no iterator was created, so there is nothing to destroy
+ return
+ s.s, s.l, s.m = NULL, 0, 0
+ try:
+ while 1:
+ with nogil:
+ slen = tbx_itr_next(self.hts, self.idx, itr, &s)
+ if slen > 0:
+ b = bcf_init()
+ ret = vcf_parse(&s, self.hdr, b)
+ if slen <= 0:
+ break
+ if ret > 0:
+ bcf_destroy(b)
+ # s.s and itr are released by the finally block
+ raise Exception("error parsing")
+ yield newVariant(b, self)
+ finally:
+ stdlib.free(s.s)
+ hts_itr_destroy(itr)
return
if self.fname.decode(ENC).endswith(('.bcf', '.bcf.gz')):
@@ -481,12 +537,8 @@ cdef class VCF(HTSFile):
self.idx = tbx_index_load(to_bytes(self.fname))
assert self.idx != NULL, "error loading tabix index for %s" % self.fname
- cdef hts_itr_t *itr
- cdef kstring_t s
- cdef bcf1_t *b
- cdef int slen = 1, ret = 0
- cdef bytes bregion = to_bytes(region)
- cdef char *cregion = bregion
+ bregion = to_bytes(region)
+ cregion = bregion
with nogil:
itr = tbx_itr_querys(self.idx, cregion)
@@ -495,6 +547,7 @@ cdef class VCF(HTSFile):
warnings.warn("no intervals found for %s at %s" % (self.fname, region))
return
+ s.s, s.l, s.m = NULL, 0, 0
try:
while 1:
with nogil:
@@ -506,8 +559,7 @@ cdef class VCF(HTSFile):
break
if ret > 0:
bcf_destroy(b)
- stdlib.free(s.s)
- hts_itr_destroy(itr)
+ # s.s and itr are released by the finally block
raise Exception("error parsing")
yield newVariant(b, self)
finally:
@@ -584,6 +636,28 @@ cdef class VCF(HTSFile):
#bcf_hrec_destroy(b)
return d
+ cpdef remove_header(self, key, order=[BCF_HL_INFO, BCF_HL_FMT, BCF_HL_FLT]):
+ """Remove a header record from the VCF by ID.
+
+ Parameters
+ ----------
+ key: str
+ ID to pull from the header.
+ """
+ key = to_bytes(key)
+ cdef bcf_hrec_t *b
+
+ for typ in order:
+ b = bcf_hdr_get_hrec(self.hdr, typ, b"ID", key, NULL)
+ if b != NULL:
+ break
+
+ if b == NULL:
+ raise KeyError(key)
+
+ bcf_hdr_remove(<bcf_hdr_t*>self.hdr, typ, <const char*>key)
+ bcf_hdr_sync(<bcf_hdr_t*>self.hdr)
+
def __getitem__(self, key):
return self.get_header_type(key)
@@ -719,10 +793,10 @@ cdef class VCF(HTSFile):
property num_records:
"""
The number of VCF records in the file, computed from the index.
- If the file is not indexed (or an index has not been set using
+ If the file is not indexed (or an index has not been set using
``set_index``) a ValueError is raised.
-
- Note that incorrect values may be returned if a mismatched
+
+ Note that incorrect values may be returned if a mismatched
index file (i.e., the index for a different VCF file) is used.
This is not detected as an error condition.
"""
@@ -1815,7 +1889,7 @@ cdef class Variant(object):
alt_freq[aU] = 0
alt_freq[tU] = -1
- # compute the alt_freq when not unknown and no div0 error
+ # compute the alt_freq when not unknown and no div0 error
clean = ~tU & ~aU & ~t0
alt_freq[clean] = (a[clean] / t[clean])
@@ -2352,7 +2426,7 @@ cdef inline Variant newVariant(bcf1_t *b, VCF vcf):
v.vcf = vcf
v.POS = v.b.pos + 1
cdef INFO i = INFO.__new__(INFO)
- i.b = b
+ i.b = b
i.hdr = vcf.hdr
v.INFO = i
return v
@@ -2481,7 +2555,7 @@ cdef class Writer(VCF):
if var.b.errcode == BCF_ERR_CTG_UNDEF:
self.add_to_header("##contig=<ID=%s>" % var.CHROM)
var.b.errcode = 0
-
+
ks_free(&s)
return var
=====================================
cyvcf2/tests/test_reader.py
=====================================
@@ -592,6 +592,25 @@ def test_empty_call():
pass
assert i == 115, i
+def test_call_restarts_each_time():
+ vcf = VCF(VCF_PATH)
+ first = sum(1 for _ in vcf())
+ second = sum(1 for _ in vcf())
+ assert first == 115, first
+ assert second == 115, second
+
+def test_region_query_does_not_advance_empty_call():
+ vcf = VCF(VCF_PATH)
+ chr1_count = sum(1 for _ in vcf("1"))
+ full_count = sum(1 for _ in vcf())
+ assert chr1_count > 0, chr1_count
+ assert full_count == 115, full_count
+
+def test_empty_call_without_index_is_empty():
+ # test.snpeff.vcf is not indexed.
+ vcf = VCF(VCF_PATH2)
+ assert sum(1 for _ in vcf()) == 0
+
def test_haploid():
@@ -649,14 +668,26 @@ def test_header_stuff():
assert seen_infos == 73, seen_infos
+def test_bcf_hdr_remove():
+ v = VCF(VCF_PATH)
+ v.add_info_to_header({'ID': 'TESTINFO', 'Description': 'Test INFO field',
+ 'Type': 'String', 'Number': '1'})
+
+ assert v.get_header_type("TESTINFO")
+
+ v.remove_header('TESTINFO')
+ with pytest.raises(KeyError):
+ v.get_header_type('TESTINFO')
+
+
def test_bcf():
vcf = VCF('{}/test.snpeff.bcf'.format(HERE))
l = sum(1 for _ in vcf)
assert l == 10, l
- # NOTE: this is 0 becuase we don't SEEK.
+ # __call__ should return an iterator from the beginning each time.
l = sum(1 for _ in vcf())
- assert l == 0, l
+ assert l == 10, l
viter = vcf("1:69260-69438")
=====================================
debian/changelog
=====================================
@@ -1,3 +1,12 @@
+cyvcf2 (0.32.0-1) unstable; urgency=medium
+
+ * New upstream version 0.32.0
+ * i386.patch: refresh.
+ * d/control: drop redundant Priority: optional.
+ * d/control: declare compliance to standards version 4.7.3.
+
+ -- Étienne Mollier <emollier at debian.org> Sun, 22 Feb 2026 17:26:33 +0100
+
cyvcf2 (0.31.4-1) unstable; urgency=medium
* New upstream version 0.31.4 (Closes: #1119732)
=====================================
debian/control
=====================================
@@ -5,7 +5,6 @@ Uploaders: Steffen Moeller <moeller at debian.org>,
Étienne Mollier <emollier at debian.org>
Section: science
Testsuite: autopkgtest-pkg-python
-Priority: optional
Build-Depends: debhelper-compat (= 13),
dh-sequence-python3,
zlib1g-dev,
@@ -22,7 +21,7 @@ Build-Depends: debhelper-compat (= 13),
libhts-dev,
libbz2-dev,
help2man <!nodoc>
-Standards-Version: 4.7.2
+Standards-Version: 4.7.3
Vcs-Browser: https://salsa.debian.org/med-team/cyvcf2
Vcs-Git: https://salsa.debian.org/med-team/cyvcf2.git
Homepage: https://github.com/brentp/cyvcf2
=====================================
debian/patches/i386.patch
=====================================
@@ -3,9 +3,9 @@ Description: I386
exceeding ranges, hence skipping
Author: Nilesh Patra <npatra974 at gmail.com>
Last-Update: 2021-01-26
---- a/cyvcf2/tests/test_reader.py
-+++ b/cyvcf2/tests/test_reader.py
-@@ -16,6 +16,8 @@ import pytest
+--- cyvcf2.orig/cyvcf2/tests/test_reader.py
++++ cyvcf2/cyvcf2/tests/test_reader.py
+@@ -16,6 +16,8 @@
from ..cyvcf2 import VCF, Variant, Writer
@@ -14,7 +14,7 @@ Last-Update: 2021-01-26
HERE = os.path.dirname(__file__)
VCF_PATH = os.path.join(HERE, "test.vcf.gz")
-@@ -1254,6 +1256,7 @@ def test_no_seqlen():
+@@ -1285,6 +1287,7 @@
with pytest.raises(AttributeError):
vcf.seqlens
=====================================
docs/source/index.rst
=====================================
@@ -52,6 +52,11 @@ See the :ref:`api` for detailed documentation, but the most common usage is summ
if v.INFO["AF"] > 0.1: continue
print(str(v))
+ # to query "all records" via __call__:
+ # this uses the index (HTS_IDX_START), so an index is required.
+ # if no index is available, this yields zero records.
+ all_vars = list(vcf())
+
# single sample of 0|1 in vcf becomes [[0, 1, True]]
# 2 samples of 0/0 and 1|1 would be [[0, 0, False], [1, 1, True]]
print v.genotypes
@@ -153,4 +158,3 @@ Pysam also `has a cython wrapper to htslib <https://github.com/pysam-developers/
docstrings
writing
-
View it on GitLab: https://salsa.debian.org/med-team/cyvcf2/-/compare/b41fb5d2805042fc7165b075cd9f0842d6b183d2...9b5f9390893ec117f9047d694f49f3560b784631
--
View it on GitLab: https://salsa.debian.org/med-team/cyvcf2/-/compare/b41fb5d2805042fc7165b075cd9f0842d6b183d2...9b5f9390893ec117f9047d694f49f3560b784631
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20260222/f97d6a4c/attachment-0001.htm>
More information about the debian-med-commit
mailing list