[med-svn] [Git][med-team/cyvcf2][master] 6 commits: New upstream version 0.32.0

Étienne Mollier (@emollier) gitlab at salsa.debian.org
Sun Feb 22 16:34:55 GMT 2026



Étienne Mollier pushed to branch master at Debian Med / cyvcf2


Commits:
9fd0cf69 by Étienne Mollier at 2026-02-22T17:24:16+01:00
New upstream version 0.32.0
- - - - -
48aebd1c by Étienne Mollier at 2026-02-22T17:24:17+01:00
Update upstream source from tag 'upstream/0.32.0'

Update to upstream version '0.32.0'
with Debian dir 28f1818cd268d54e4948303f910cd67ded00dc05
- - - - -
96a2905e by Étienne Mollier at 2026-02-22T17:25:12+01:00
i386.patch: refresh.

- - - - -
60ef0d40 by Étienne Mollier at 2026-02-22T17:25:30+01:00
d/control: drop redundant Priority: optional.

- - - - -
c375442e by Étienne Mollier at 2026-02-22T17:26:07+01:00
d/control: declare compliance to standards version 4.7.3.

- - - - -
9b5f9390 by Étienne Mollier at 2026-02-22T17:34:27+01:00
d/changelog: ready for upload to unstable.

- - - - -


11 changed files:

- .gitignore
- CHANGES.md
- README.md
- cyvcf2/__init__.py
- cyvcf2/cyvcf2.pxd
- cyvcf2/cyvcf2.pyx
- cyvcf2/tests/test_reader.py
- debian/changelog
- debian/control
- debian/patches/i386.patch
- docs/source/index.rst


Changes:

=====================================
.gitignore
=====================================
@@ -1,3 +1,5 @@
+out.vcf
+test.vcf
 *.pyc
 cyvcf2.egg-info/
 build
@@ -20,3 +22,4 @@ setup-requires/*
 .cache/v/cache/lastfailed
 .idea
 **/__pycache__
+.coverage


=====================================
CHANGES.md
=====================================
@@ -1,3 +1,10 @@
+# v0.32.0
++ make repeated `vcf()` calls restart from the beginning and isolate region
+  queries from the main iterator state. *NOTE* this can be backwards incompatible for those using vcf() to iterate without an index.
+
+# v0.31.5
++ allow removing a header line (thanks @fxwiegand #323)
+
 # v0.31.4
 + add python 3.14 wheels (thanks @benjeffery #321)
 


=====================================
README.md
=====================================
@@ -65,6 +65,11 @@ vcf = VCF('some.vcf.gz')
 for v in vcf('11:435345-556565'):
     if v.INFO["AF"] > 0.1: continue
     print(str(v))
+
+# to query "all records" via __call__:
+# this uses the index (HTS_IDX_START), so an index is required.
+# if no index is available, this yields zero records.
+all_vars = list(vcf())
 ```
 
 Installation


=====================================
cyvcf2/__init__.py
=====================================
@@ -2,4 +2,4 @@ from .cyvcf2 import (VCF, Variant, Writer, r_ as r_unphased, par_relatedness,
                      par_het)
 Reader = VCFReader = VCF
 
-__version__ = "0.31.4"
+__version__ = "0.32.0"


=====================================
cyvcf2/cyvcf2.pxd
=====================================
@@ -3,7 +3,7 @@ import numpy as np
 cimport numpy as np
 np.import_array()
 
-cdef extern from "string.h":  
+cdef extern from "string.h":
     void* memcpy(void* dest, const void* src, size_t n)
 
 cdef extern from "relatedness.h":
@@ -38,6 +38,9 @@ cdef extern from "htslib/hts.h":
 
     int hts_set_threads(htsFile *fp, int n);
 
+    cdef enum:
+        HTS_IDX_START
+
 
     cdef union ufp:
         hFILE *hfile;
@@ -49,7 +52,7 @@ cdef extern from "htslib/hts.h":
 
     ctypedef struct htsFormat:
         htsExactFormat format
-    
+
     ctypedef struct htsFile:
         ufp fp
         htsFormat format
@@ -101,6 +104,7 @@ cdef extern from "htslib/vcf.h":
         pass
 
     int bcf_itr_next(htsFile *, hts_itr_t* iter, bcf1_t*)
+    hts_itr_t *bcf_itr_queryi(hts_idx_t *, int tid, int64_t beg, int64_t end)
     hts_itr_t *bcf_itr_querys(hts_idx_t *, void *, char *);
 
 
@@ -259,6 +263,7 @@ cdef extern from "htslib/vcf.h":
     void bcf_hrec_destroy(bcf_hrec_t *)
     bcf_hrec_t *bcf_hdr_id2hrec(const bcf_hdr_t *hdr, int type, int idx, int rid);
     int bcf_hdr_add_hrec(bcf_hdr_t *hdr, bcf_hrec_t *hrec);
+    void bcf_hdr_remove(bcf_hdr_t *hdr, int type, const char *key);
 
     int hts_close(htsFile *fp);
 


=====================================
cyvcf2/cyvcf2.pyx
=====================================
@@ -468,9 +468,65 @@ cdef class VCF(HTSFile):
         Returns
         -------
         An Iterator over the requested region.
+
+        Notes
+        -----
+        If `region` is empty, cyvcf2 uses `HTS_IDX_START` to iterate from the
+        start of the index. This mode requires an index; without an index, the
+        iterator is empty.
         """
+        cdef hts_itr_t *itr
+        cdef kstring_t s
+        cdef bcf1_t *b
+        cdef int slen = 1, ret = 0
+        cdef bytes bregion
+        cdef char *cregion
+
         if not region:
-            yield from self
+            self._open_index()
+            if self.hidx == NULL and self.idx == NULL:
+                return
+
+            if self.hidx != NULL:
+                itr = bcf_itr_queryi(self.hidx, HTS_IDX_START, 0, 0)
+                if itr == NULL:
+                    return
+                try:
+                    while True:
+                        b = bcf_init()
+                        ret = bcf_itr_next(self.hts, itr, b)
+                        if ret < 0:
+                            bcf_destroy(b)
+                            break
+                        ret = bcf_subset_format(self.hdr, b)
+                        assert ret == 0, ("could not subset variant", self.fname, region)
+                        yield newVariant(b, self)
+                finally:
+                    hts_itr_destroy(itr)
+                return
+
+            itr = tbx_itr_queryi(self.idx, HTS_IDX_START, 0, 0)
+            if itr == NULL:
+                # no iterator was created, so there is nothing to destroy
+                return
+            s.s, s.l, s.m = NULL, 0, 0
+            try:
+                while 1:
+                    with nogil:
+                        slen = tbx_itr_next(self.hts, self.idx, itr, &s)
+                        if slen > 0:
+                            b = bcf_init()
+                            ret = vcf_parse(&s, self.hdr, b)
+                    if slen <= 0:
+                        break
+                    if ret > 0:
+                        bcf_destroy(b)
+                        # s.s and itr are released by the finally block
+                        raise Exception("error parsing")
+                    yield newVariant(b, self)
+            finally:
+                stdlib.free(s.s)
+                hts_itr_destroy(itr)
             return
 
         if self.fname.decode(ENC).endswith(('.bcf', '.bcf.gz')):
@@ -481,12 +537,8 @@ cdef class VCF(HTSFile):
             self.idx = tbx_index_load(to_bytes(self.fname))
             assert self.idx != NULL, "error loading tabix index for %s" % self.fname
 
-        cdef hts_itr_t *itr
-        cdef kstring_t s
-        cdef bcf1_t *b
-        cdef int slen = 1, ret = 0
-        cdef bytes bregion = to_bytes(region)
-        cdef char *cregion = bregion
+        bregion = to_bytes(region)
+        cregion = bregion
 
         with nogil:
             itr = tbx_itr_querys(self.idx, cregion)
@@ -495,6 +547,7 @@ cdef class VCF(HTSFile):
             warnings.warn("no intervals found for %s at %s" % (self.fname, region))
             return
 
+        s.s, s.l, s.m = NULL, 0, 0
         try:
             while 1:
                 with nogil:
@@ -506,8 +559,7 @@ cdef class VCF(HTSFile):
                     break
                 if ret > 0:
                     bcf_destroy(b)
-                    stdlib.free(s.s)
-                    hts_itr_destroy(itr)
+                    # s.s and itr are released by the finally block
                     raise Exception("error parsing")
                 yield newVariant(b, self)
         finally:
@@ -584,6 +636,28 @@ cdef class VCF(HTSFile):
         #bcf_hrec_destroy(b)
         return d
 
+    cpdef remove_header(self, key, order=[BCF_HL_INFO, BCF_HL_FMT, BCF_HL_FLT]):
+        """Remove a header record from the VCF by ID.
+
+        Parameters
+        ----------
+        key: str
+            ID to pull from the header.
+        """
+        key = to_bytes(key)
+        cdef bcf_hrec_t *b
+
+        for typ in order:
+            b = bcf_hdr_get_hrec(self.hdr, typ, b"ID", key, NULL)
+            if b != NULL:
+                break
+
+        if b == NULL:
+            raise KeyError(key)
+
+        bcf_hdr_remove(<bcf_hdr_t*>self.hdr, typ, <const char*>key)
+        bcf_hdr_sync(<bcf_hdr_t*>self.hdr)
+
     def __getitem__(self, key):
         return self.get_header_type(key)
 
@@ -719,10 +793,10 @@ cdef class VCF(HTSFile):
     property num_records:
         """
         The number of VCF records in the file, computed from the index.
-        If the file is not indexed (or an index has not been set using 
+        If the file is not indexed (or an index has not been set using
         ``set_index``) a ValueError is raised.
-        
-        Note that incorrect values may be returned if a mismatched 
+
+        Note that incorrect values may be returned if a mismatched
         index file (i.e., the index for a different VCF file) is used.
         This is not detected as an error condition.
         """
@@ -1815,7 +1889,7 @@ cdef class Variant(object):
             alt_freq[aU] = 0
             alt_freq[tU] = -1
 
-            # compute the alt_freq when not unknown and no div0 error 
+            # compute the alt_freq when not unknown and no div0 error
             clean = ~tU & ~aU & ~t0
             alt_freq[clean] = (a[clean] / t[clean])
 
@@ -2352,7 +2426,7 @@ cdef inline Variant newVariant(bcf1_t *b, VCF vcf):
     v.vcf = vcf
     v.POS = v.b.pos + 1
     cdef INFO i = INFO.__new__(INFO)
-    i.b = b 
+    i.b = b
     i.hdr = vcf.hdr
     v.INFO = i
     return v
@@ -2481,7 +2555,7 @@ cdef class Writer(VCF):
         if var.b.errcode == BCF_ERR_CTG_UNDEF:
             self.add_to_header("##contig=<ID=%s>" % var.CHROM)
             var.b.errcode = 0
-        
+
         ks_free(&s)
         return var
 


=====================================
cyvcf2/tests/test_reader.py
=====================================
@@ -592,6 +592,25 @@ def test_empty_call():
         pass
     assert i == 115, i
 
+def test_call_restarts_each_time():
+    vcf = VCF(VCF_PATH)
+    first = sum(1 for _ in vcf())
+    second = sum(1 for _ in vcf())
+    assert first == 115, first
+    assert second == 115, second
+
+def test_region_query_does_not_advance_empty_call():
+    vcf = VCF(VCF_PATH)
+    chr1_count = sum(1 for _ in vcf("1"))
+    full_count = sum(1 for _ in vcf())
+    assert chr1_count > 0, chr1_count
+    assert full_count == 115, full_count
+
+def test_empty_call_without_index_is_empty():
+    # test.snpeff.vcf is not indexed.
+    vcf = VCF(VCF_PATH2)
+    assert sum(1 for _ in vcf()) == 0
+
 
 
 def test_haploid():
@@ -649,14 +668,26 @@ def test_header_stuff():
     assert seen_infos == 73, seen_infos
 
 
+def test_bcf_hdr_remove():
+    v = VCF(VCF_PATH)
+    v.add_info_to_header({'ID': 'TESTINFO', 'Description': 'Test INFO field',
+        'Type': 'String', 'Number': '1'})
+
+    assert v.get_header_type("TESTINFO")
+
+    v.remove_header('TESTINFO')
+    with pytest.raises(KeyError):
+        v.get_header_type('TESTINFO')
+
+
 def test_bcf():
     vcf = VCF('{}/test.snpeff.bcf'.format(HERE))
     l = sum(1 for _ in vcf)
     assert l == 10, l
 
-    # NOTE: this is 0 becuase we don't SEEK.
+    # __call__ should return an iterator from the beginning each time.
     l = sum(1 for _ in vcf())
-    assert l == 0, l
+    assert l == 10, l
 
 
     viter = vcf("1:69260-69438")


=====================================
debian/changelog
=====================================
@@ -1,3 +1,12 @@
+cyvcf2 (0.32.0-1) unstable; urgency=medium
+
+  * New upstream version 0.32.0
+  * i386.patch: refresh.
+  * d/control: drop redundant Priority: optional.
+  * d/control: declare compliance to standards version 4.7.3.
+
+ -- Étienne Mollier <emollier at debian.org>  Sun, 22 Feb 2026 17:26:33 +0100
+
 cyvcf2 (0.31.4-1) unstable; urgency=medium
 
   * New upstream version 0.31.4  (Closes: #1119732)


=====================================
debian/control
=====================================
@@ -5,7 +5,6 @@ Uploaders: Steffen Moeller <moeller at debian.org>,
            Étienne Mollier <emollier at debian.org>
 Section: science
 Testsuite: autopkgtest-pkg-python
-Priority: optional
 Build-Depends: debhelper-compat (= 13),
                dh-sequence-python3,
                zlib1g-dev,
@@ -22,7 +21,7 @@ Build-Depends: debhelper-compat (= 13),
                libhts-dev,
                libbz2-dev,
                help2man <!nodoc>
-Standards-Version: 4.7.2
+Standards-Version: 4.7.3
 Vcs-Browser: https://salsa.debian.org/med-team/cyvcf2
 Vcs-Git: https://salsa.debian.org/med-team/cyvcf2.git
 Homepage: https://github.com/brentp/cyvcf2


=====================================
debian/patches/i386.patch
=====================================
@@ -3,9 +3,9 @@ Description: I386
  exceeding ranges, hence skipping
 Author: Nilesh Patra <npatra974 at gmail.com>
 Last-Update: 2021-01-26
---- a/cyvcf2/tests/test_reader.py
-+++ b/cyvcf2/tests/test_reader.py
-@@ -16,6 +16,8 @@ import pytest
+--- cyvcf2.orig/cyvcf2/tests/test_reader.py
++++ cyvcf2/cyvcf2/tests/test_reader.py
+@@ -16,6 +16,8 @@
  
  from ..cyvcf2 import VCF, Variant, Writer
  
@@ -14,7 +14,7 @@ Last-Update: 2021-01-26
  
  HERE = os.path.dirname(__file__)
  VCF_PATH = os.path.join(HERE, "test.vcf.gz")
-@@ -1254,6 +1256,7 @@ def test_no_seqlen():
+@@ -1285,6 +1287,7 @@
      with pytest.raises(AttributeError):
          vcf.seqlens
  


=====================================
docs/source/index.rst
=====================================
@@ -52,6 +52,11 @@ See the :ref:`api` for detailed documentation, but the most common usage is summ
         if v.INFO["AF"] > 0.1: continue
         print(str(v))
 
+    # to query "all records" via __call__:
+    # this uses the index (HTS_IDX_START), so an index is required.
+    # if no index is available, this yields zero records.
+    all_vars = list(vcf())
+
         # single sample of 0|1 in vcf becomes [[0, 1, True]]
         # 2 samples of 0/0 and 1|1 would be [[0, 0, False], [1, 1, True]]
         print v.genotypes 
@@ -153,4 +158,3 @@ Pysam also `has a cython wrapper to htslib <https://github.com/pysam-developers/
 
    docstrings
    writing
-



View it on GitLab: https://salsa.debian.org/med-team/cyvcf2/-/compare/b41fb5d2805042fc7165b075cd9f0842d6b183d2...9b5f9390893ec117f9047d694f49f3560b784631

-- 
View it on GitLab: https://salsa.debian.org/med-team/cyvcf2/-/compare/b41fb5d2805042fc7165b075cd9f0842d6b183d2...9b5f9390893ec117f9047d694f49f3560b784631
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20260222/f97d6a4c/attachment-0001.htm>


More information about the debian-med-commit mailing list