[med-svn] [Git][med-team/python-pybedtools][upstream] New upstream version 0.9.1

Tue Oct 31 15:41:12 GMT 2023


Lance Lin pushed to branch upstream at Debian Med / python-pybedtools


Commits:
fe2190f2 by Lance Lin at 2023-10-27T19:34:49+07:00
New upstream version 0.9.1
- - - - -


17 changed files:

- .github/workflows/main.yml
- README.rst
- docs/source/changes.rst
- docs/source/topical-genome.rst
- pybedtools/_Window.pyx
- pybedtools/bedtool.py
- pybedtools/cbedtools.pyx
- pybedtools/featurefuncs.pyx
- pybedtools/helpers.py
- pybedtools/scripts/venn_mpl.py
- + pybedtools/test/data/example.narrowPeak
- pybedtools/test/test_1.py
- pybedtools/test/test_helpers.py
- pybedtools/test/test_issues.py
- pybedtools/version.py
- + pyproject.toml
- setup.py


Changes:

=====================================
.github/workflows/main.yml
=====================================
@@ -1,11 +1,19 @@
 name: main
-on: [push]
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    types:
+      - opened
+      - reopened
+      - synchronize
 
 jobs:
   build-and-test:
     strategy:
       matrix:
-        python-version: [3.6, 3.7, 3.8, 3.9]
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout at v2
@@ -30,7 +38,7 @@ jobs:
       # This only requires Cython, no other dependencies.
       run: |
         eval "$(conda shell.bash hook)"
-        conda create -p ./cython-env -y cython
+        conda create -p ./cython-env -y cython python=${{ matrix.python-version }} numpy
         conda activate ./cython-env
         python setup.py clean cythonize sdist
         (cd dist && pip install pybedtools-*.tar.gz && cd $TMPDIR && python -c 'import pybedtools; print(pybedtools.__file__)')
@@ -53,31 +61,53 @@ jobs:
       # Tests below will operate in this newly-installed directory.
       run: |
         eval "$(conda shell.bash hook)"
-        conda create -y -p ./test-env \
-          --channel conda-forge \
-          --channel bioconda python=${{ matrix.python-version }} \
-          --file requirements.txt \
-          --file test-requirements.txt \
-          --file optional-requirements.txt
+        conda install mamba python=${{ matrix.python-version }} -y --channel conda-forge
+
+        if [ ${{ matrix.python-version }} != "3.11" ]; then
+          mamba create -y -p ./test-env \
+            --channel conda-forge \
+            --channel bioconda python=${{ matrix.python-version }} \
+            --file requirements.txt \
+            --file test-requirements.txt \
+            --file optional-requirements.txt
+          conda activate ./test-env
+        else
+          # Only install bedtools; let pip take care of the rest for 3.11 until
+          # bioconda catches up.
+          #
+          # We still install the test requirements though, and the optional
+          # requirements except for genomepy which is in bioconda.
+          grep -v "genomepy" optional-requirements.txt > optional-requirements-3.11.txt
+          mamba create -y -p ./test-env \
+            --channel conda-forge \
+            --channel bioconda \
+            bedtools \
+            python=${{ matrix.python-version }} \
+            --file test-requirements.txt \
+            --file optional-requirements-3.11.txt
+          conda activate ./test-env
+          pip install genomepy
+
+        fi
         conda activate ./test-env
 
         mkdir -p /tmp/pybedtools-uncompressed
         cd /tmp/pybedtools-uncompressed
         tar -xf $WORKDIR/dist/pybedtools-*.tar.gz
-        cd pybedtools-*
-        pip install -e .
-        python -c 'import pybedtools; print(pybedtools.__file__)'
-        ls *
+        pip install -e /tmp/pybedtools-uncompressed/pybedtools-*
 
+        # Trying import in the same directory will complain that cbedtools
+        # can't be imported
+        (cd / && python -c 'import pybedtools; print(pybedtools.__file__)')
 
     - name: tests
       # Run pytest and sphinx doctests
       run: |
-        eval "$(conda shell.bash hook)"
         cd $WORKDIR
+        eval "$(conda shell.bash hook)"
         conda activate ./test-env
 
-        # Move to extracted tarball dir, see above notes
+        # Extract the package tarball built above, and use that for running the tests.
         cd /tmp/pybedtools-uncompressed/pybedtools-*
         pytest -v --doctest-modules
         pytest -v pybedtools/test/genomepy_integration.py
@@ -89,29 +119,31 @@ jobs:
       # Build docs and commit to gh-pages branch. Note that no push happens
       # unless we're on the master branch
       run: |
-        eval "$(conda shell.bash hook)"
-        conda activate ./test-env
-
-        # Move to extracted tarball dir, see above notes
-        cd /tmp/pybedtools-uncompressed/pybedtools-*
-        (cd docs && make html)
-
-        git clone \
-          --single-branch \
-          --branch gh-pages "https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY" \
-          /tmp/docs
-
-        rm -rf /tmp/docs/*
-        cp -r docs/build/html/* /tmp/docs
-        touch /tmp/docs/.nojekyll
-        cd /tmp/docs
-        git add .
-        if git diff --cached --quiet; then
-          echo "no changes, nothing to commit"
-        else
-          git commit -m 'update docs'
+        if [ ${{ matrix.python-version }} != "3.11" ]; then
+          eval "$(conda shell.bash hook)"
+          conda activate ./test-env
+
+          # Move to extracted tarball dir, see above notes
+          cd /tmp/pybedtools-uncompressed/pybedtools-*
+          (cd docs && make html)
+
+          git clone \
+            --single-branch \
+            --branch gh-pages "https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY" \
+            /tmp/docs
+
+          rm -rf /tmp/docs/*
+          cp -r docs/build/html/* /tmp/docs
+          touch /tmp/docs/.nojekyll
+          cd /tmp/docs
+          git add .
+          if git diff --cached --quiet; then
+            echo "no changes, nothing to commit"
+          else
+            git commit -m 'update docs'
+          fi
+          cd $WORKDIR
         fi
-        cd $WORKDIR
 
 
     - name: docs artifact


=====================================
README.rst
=====================================
@@ -1,3 +1,4 @@
+
 Overview
 --------
 


=====================================
docs/source/changes.rst
=====================================
@@ -3,6 +3,26 @@
 Changelog
 =========
 
+Changes in v0.9.1
+-----------------
+
+2023-07-23
+
+* Dropping support for Python 3.6 and 3.7
+* Respect sorting of chromsize files (thanks @mgperry)
+* Updated setup.py to correctly reflect the MIT license change elsewhere (`#374
+  <https://github.com/daler/pybedtools/issues/374>`, thanks @hyandell)
+*  Support plotting lengths of intervals and custom DPI (`#367
+   <https://github.com/daler/pybedtools/issues/367>`, `#366
+   <https://github.com/daler/pybedtools/issues/366>`), thanks @yunfeiguo)
+* Remove outdated hard-coded check for 510 files in ``intersect`` and instead
+  defer to local machine's ``ulimit``
+* Enabling building/installing on Python 3.11 (thanks @daz10000)
+* Allow np.int64 start/stop positions to be used when creating Interval objects (`#390 <https://github.com/daler/pybedtools/issues/390>`)
+* properly close filehandles in .save_seq (thanks @PeterRobots)
+* include minimal pyproject.toml file (thanks @afg1)
+
+
 Changes in v0.9
 ---------------
 


=====================================
docs/source/topical-genome.rst
=====================================
@@ -98,20 +98,18 @@ will create a file from a dictionary or string:
     'dm3.genome'
     >>> print(open('dm3.genome').read())
     chr2L	23011544
-    chr2LHet	368872
     chr2R	21146708
-    chr2RHet	3288761
     chr3L	24543557
-    chr3LHet	2555491
     chr3R	27905053
-    chr3RHet	2517507
     chr4	1351857
+    chrX	22422827
+    chr2LHet	368872
+    chr2RHet	3288761
+    chr3LHet	2555491
+    chr3RHet	2517507
     chrM	19517
     chrU	10049037
     chrUextra	29004656
-    chrX	22422827
     chrXHet	204112
     chrYHet	347038
     <BLANKLINE>
-
-


=====================================
pybedtools/_Window.pyx
=====================================
@@ -1,4 +1,5 @@
 # cython: profile=True
+# cython: language_level=2
 
 import os
 from collections import deque


=====================================
pybedtools/bedtool.py
=====================================
@@ -338,20 +338,6 @@ def _wraps(
             if check_for_genome:
                 kwargs = self.check_genome(**kwargs)
 
-            # TODO: should this be implemented as a generic function that can
-            # be passed in for a each tool to check kwargs? Currently this is
-            # the only check I can think of.
-            if prog in ("intersect", "intersectBed"):
-                if (
-                    isinstance(kwargs["b"], list)
-                    and len(kwargs["b"]) > 510
-                    and all([isinstance(i, str) for i in kwargs["b"]])
-                ):
-                    raise pybedtoolsError(
-                        "BEDTools intersect does not support > 510 filenames for -b "
-                        "argument. Consider passing these as BedTool objects instead"
-                    )
-
             # For sequence methods, we may need to make a tempfile that will
             # hold the resulting sequence.  For example, fastaFromBed needs to
             # make a tempfile for 'fo' if no 'fo' was explicitly specified by
@@ -2130,7 +2116,7 @@ class BedTool(object):
         """
 
     @_log_to_history
-    @_wraps(prog="sortBed", implicit="i")
+    @_wraps(prog="sortBed", implicit="i", uses_genome=True, genome_if=["g", "genome"])
     def sort(self):
         """
         Wraps `bedtools sort`.
@@ -2320,8 +2306,8 @@ class BedTool(object):
         chr1	0	1
         chr1	500	900
         chr1	950	249250621
-        chr10	0	135534747
-        chr11	0	135006516
+        chr2	0	243199373
+        chr3	0	198022430
         """
 
     @_log_to_history
@@ -2726,9 +2712,11 @@ class BedTool(object):
 
         if not hasattr(self, "seqfn"):
             raise ValueError("Use .sequence(fasta) to get the sequence first")
-        fout = open(fn, "w")
-        fout.write(open(self.seqfn).read())
-        fout.close()
+
+        with open(fn, "w") as fout:
+            with open(self.seqfn) as seqfile:
+                fout.write(seqfile.read())
+
         new_bedtool = BedTool(self.fn)
         new_bedtool.seqfn = fn
         return new_bedtool


=====================================
pybedtools/cbedtools.pyx
=====================================
@@ -1,4 +1,5 @@
 # distutils: language = c++
+# cython: language_level=2
 
 # String notes:
 #
@@ -15,6 +16,7 @@
 
 from cpython.version cimport PY_MAJOR_VERSION
 from libcpp.string cimport string
+import numpy as np
 
 # Python byte strings automatically coerce to/from C++ strings.
 
@@ -23,7 +25,7 @@ cdef _cppstr(s):
     #
     # C++ uses bytestrings. PY2 strings need no conversion; bare PY3 strings
     # are unicode and so must be encoded to bytestring.
-    if isinstance(s, int):
+    if isinstance(s, integer_types):
         s = str(s)
     if isinstance(s, unicode):
         s = s.encode('UTF-8')
@@ -36,9 +38,9 @@ cdef _pystr(string s):
     return s.decode('UTF-8', 'strict')
 
 if PY_MAJOR_VERSION < 3:
-    integer_types = (int, long)
+    integer_types = (int, long, np.int64)
 else:
-    integer_types = (int,)
+    integer_types = (int, np.int64)
 
 """
     bedtools.pyx: A Cython wrapper for the BEDTools BedFile class


=====================================
pybedtools/featurefuncs.pyx
=====================================
@@ -1,3 +1,4 @@
+# cython: language_level=2
 # distutils: language = c++
 from cbedtools cimport Interval
 from cbedtools import create_interval_from_list


=====================================
pybedtools/helpers.py
=====================================
@@ -815,7 +815,7 @@ def chromsizes_to_file(chrom_sizes, fn=None):
     if isinstance(chrom_sizes, str):
         chrom_sizes = chromsizes(chrom_sizes)
     fout = open(fn, "wt")
-    for chrom, bounds in sorted(chrom_sizes.items()):
+    for chrom, bounds in chrom_sizes.items():
         line = chrom + "\t" + str(bounds[1]) + "\n"
         fout.write(line)
     fout.close()


=====================================
pybedtools/scripts/venn_mpl.py
=====================================
@@ -17,8 +17,7 @@ import sys
 import os
 import pybedtools
 
-
-def venn_mpl(a, b, c, colors=None, outfn="out.png", labels=None):
+def venn_mpl(a, b, c, colors=None, outfn="out.png", labels=None, by_length=False, dpi=300):
     """
     *a*, *b*, and *c* are filenames to BED-like files.
 
@@ -30,6 +29,11 @@ def venn_mpl(a, b, c, colors=None, outfn="out.png", labels=None):
 
     *labels* is a list of labels to use for each of the files; by default the
     labels are ['a','b','c']
+
+    *by_length* if True, then instead of plotting number of intervals, plot combined
+    lengths of intervals
+
+    *dpi* is the dpi setting passed to matplotlib savefig
     """
     try:
         import matplotlib.pyplot as plt
@@ -44,6 +48,9 @@ def venn_mpl(a, b, c, colors=None, outfn="out.png", labels=None):
     a = pybedtools.BedTool(a)
     b = pybedtools.BedTool(b)
     c = pybedtools.BedTool(c)
+    count_features = lambda x:x.count()
+    if by_length:
+        count_features = lambda x:x.total_coverage()
 
     if colors is None:
         colors = ["r", "b", "g"]
@@ -89,35 +96,35 @@ def venn_mpl(a, b, c, colors=None, outfn="out.png", labels=None):
     kwargs = dict(horizontalalignment="center")
 
     # Unique to A
-    ax.text(center - 2 * offset, center + offset, str((a - b - c).count()), **kwargs)
+    ax.text(center - 2 * offset, center + offset, str(count_features(a - b - c)), **kwargs)
 
     # Unique to B
-    ax.text(center + 2 * offset, center + offset, str((b - a - c).count()), **kwargs)
+    ax.text(center + 2 * offset, center + offset, str(count_features(b - a - c)), **kwargs)
 
     # Unique to C
-    ax.text(center, center - 2 * offset, str((c - a - b).count()), **kwargs)
+    ax.text(center, center - 2 * offset, str(count_features(c - a - b)), **kwargs)
 
     # A and B not C
     ax.text(
-        center, center + 2 * offset - 0.5 * offset, str((a + b - c).count()), **kwargs
+        center, center + 2 * offset - 0.5 * offset, str(count_features(a + b - c)), **kwargs
     )
 
     # A and C not B
     ax.text(
-        center - 1.2 * offset, center - 0.5 * offset, str((a + c - b).count()), **kwargs
+        center - 1.2 * offset, center - 0.5 * offset, str(count_features(a + c - b)), **kwargs
     )
 
     # B and C not A
     ax.text(
-        center + 1.2 * offset, center - 0.5 * offset, str((b + c - a).count()), **kwargs
+        center + 1.2 * offset, center - 0.5 * offset, str(count_features(b + c - a)), **kwargs
     )
 
     # all
-    ax.text(center, center, str((a + b + c).count()), **kwargs)
+    ax.text(center, center, str(count_features(a + b + c)), **kwargs)
 
     ax.legend(loc="best")
 
-    fig.savefig(outfn)
+    fig.savefig(outfn, dpi=dpi)
 
     plt.close(fig)
 


=====================================
pybedtools/test/data/example.narrowPeak
=====================================
@@ -0,0 +1,5 @@
+track type=narrowPeak visibility=3 db=hg19 name="nPk" description="ENCODE narrowPeak Example"
+browser position chr1:9356000-9365000
+chr1	9356548	9356648	.	0	.	182	5.0945	-1	50
+chr1	9358722	9358822	.	0	.	91	4.6052	-1	40
+chr1	9361082	9361182	.	0	.	182	9.2103	-1	75


=====================================
pybedtools/test/test_1.py
=====================================
@@ -1542,7 +1542,8 @@ def test_window_maker():
     x = pybedtools.BedTool()
     z = x.window_maker(genome="hg19", w=100000)
     assert str(z[0]) == "chr1\t0\t100000\n"
-    assert str(z[10000]) == "chr16\t20800000\t20900000\n"
+    assert str(z[10000]) == "chr5\t118200000\t118300000\n"
+
 
 
 def test_random():
@@ -1551,16 +1552,16 @@ def test_random():
     print(result)
     assert result == fix(
     """
-    chr12	95739557	95739567	1	10	-
-    chr2	113014250	113014260	2	10	-
-    chr19	28057962	28057972	3	10	+
-    chr4	76502010	76502020	4	10	+
-    chr3	151395380	151395390	5	10	-
-    chr2	43767824	43767834	6	10	+
-    chr10	117350440	117350450	7	10	+
-    chr6	65439870	65439880	8	10	+
-    chr16	19569197	19569207	9	10	-
-    chr7	104134021	104134031	10	10	-
+    chr3	123121550	123121560	1	10	-
+    chr8	10634720	10634730	2	10	-
+    chr7	24782325	24782335	3	10	+
+    chr12	19713230	19713240	4	10	+
+    chr11	31590686	31590696	5	10	-
+    chr7	100526957	100526967	6	10	+
+    chr2	117350440	117350450	7	10	+
+    chr15	25320997	25321007	8	10	+
+    chr5	116677189	116677199	9	10	-
+    chr17	75329339	75329349	10	10	-
     """
     )
 


=====================================
pybedtools/test/test_helpers.py
=====================================
@@ -133,14 +133,14 @@ def test_chromsizes():
         assert hg17["chr1"] == (0, 245522847)
 
         fn = pybedtools.chromsizes_to_file(hg17, fn="hg17.genome")
-        expected = "chr1\t245522847\n"
+        expected = "chr10\t135413628\n"
         results = open(fn).readline()
         print(results)
         assert expected == results
 
         # make sure the tempfile version works, too
         fn = pybedtools.chromsizes_to_file(hg17, fn=None)
-        expected = "chr1\t245522847\n"
+        expected = "chr10\t135413628\n"
         results = open(fn).readline()
         print(results)
         assert expected == results


=====================================
pybedtools/test/test_issues.py
=====================================
@@ -733,38 +733,55 @@ def test_issue_258():
 
 def test_issue_303():
     # Issue 303 describes hitting a cap of 253 -b files. Locally I hit a limit
-    # at 510, and observe the same on travis-ci.
+    # at 510 on Linux and observe the same on travis-ci. On macOS it's 256.
     #
     # The fix was to check the args in bedtool._wraps, and raise an exception
-    # if there's more than 510 filenames provided. Note that it works find with
-    # many BedTool objects.
+    # if there's more than supported filenames provided. Note that it works
+    # fine with many BedTool objects.
+
+    ulimit = subprocess.run(
+        ['/bin/bash', '-c', "ulimit -n"], capture_output=True, universal_newlines=True
+    )
+    ulimit = int(ulimit.stdout)
+    print(ulimit)
 
     b = []
-    for i in range(1000):
+    current_prefix = pybedtools.settings.tempfile_prefix
+    pybedtools.settings.tempfile_prefix = "/tmp/p"
+    for i in range(ulimit):
         b.append(
             pybedtools.BedTool(
                 "chr1\t{0}\t{1}\tb{0}".format(i, i + 1), from_string=True
             )
         )
+    pybedtools.settings.tempfile_prefix = current_prefix
     a = pybedtools.example_bedtool("a.bed")
 
     # Use many BedTool objects; this works
     x = a.intersect(b, wao=True, filenames=True)
 
-    # Try different cutoffs, providing filenames rather than BedTool objects:
+    # Try different cutoffs, providing filenames rather than BedTool objects.
+    # Note that on some systems this will hit `ARG_MAX` of the system before it
+    # hits the ulimit.
+    #
+    # Rather than find (and push) the limits of whatever system this test is
+    # running on, for now use 510 as a reasonable test for "many".
     for n in [64, 256, 510]:
+        if n >= ulimit:
+            print('ulimit of', ulimit, 'reached; stopping')
+            break
         b2 = [i.fn for i in b[:n]]
         try:
             y = a.intersect(b2)
 
-        # If running on a system that supports <510 filenames, we'll get
+        # If running on a system that supports <n filenames, we'll get
         # a BEDToolsError, so catch that and report here
-        except pybedtools.helpers.BEDToolsError:
+        except (pybedtools.helpers.BEDToolsError, OSError):
             raise ValueError("Hit a limit at {0} files".format(n))
 
     # Otherwise, too many filenames should raise a pybedtoolsError as detected
     # by the _wraps() function.
-    with pytest.raises(pybedtools.helpers.pybedtoolsError):
+    with pytest.raises(pybedtools.helpers.BEDToolsError):
         y = a.intersect([i.fn for i in b])
 
 
@@ -857,3 +874,48 @@ def test_issue_355():
             break
     assert line.split('\t')[1] == '14'
     assert vcf[0].start == 13
+
+    
+def test_genome_dict_sort():
+    genome = {
+        "chr1": (0, 5000),
+        "chr9": (0, 5000),
+        "chr12": (0, 5000),
+    }
+
+    # example taken from BedTool.sort() doctest
+    bed = pybedtools.BedTool(
+        """
+        chr9 300 400
+        chr1 100 200
+        chr1 1 50
+        chr12 1 100
+        chr9 500 600
+        """,
+        from_string=True,
+    )
+
+    result = bed.sort(genome=genome)
+
+    assert result == fix(
+        """
+        chr1	1	50
+        chr1	100	200
+        chr9	300	400
+        chr9	500	600
+        chr12	1	100
+        """
+    ), result
+
+
+def test_issue_365():
+    # confirming that narrowPeak works; #365 may be due to spaces rather than
+    # tabs in user's original file or maybe copying from UCSC
+    a = pybedtools.example_bedtool('example.narrowPeak')
+    a[0]
+
+def test_issue_390():
+    # Previously raised AttributeError: 'numpy.int64' object has no attribute 'isdigit'
+    # Fix was to include np.int64 as an integer type in cbedtools.pyx.
+    import numpy as np
+    pybedtools.BedTool([['chr1', np.int64(1), np.int64(2)]])


=====================================
pybedtools/version.py
=====================================
@@ -1,4 +1,4 @@
 
 # THIS FILE IS GENERATED FROM SETUP.PY
-version = '0.8.2'
+version = '0.9.1'
 __version__ = version
\ No newline at end of file


=====================================
pyproject.toml
=====================================
@@ -0,0 +1,3 @@
+[build-system]
+requires = ["setuptools>=61.0", "wheel", "Cython>=0.29.30,<3.0"]
+build-backend = "setuptools.build_meta:__legacy__"
\ No newline at end of file


=====================================
setup.py
=====================================
@@ -77,7 +77,7 @@ import distutils.log
 
 MAJ = 0
 MIN = 9
-REV = 0
+REV = 1
 VERSION = '%d.%d.%d' % (MAJ, MIN, REV)
 
 
@@ -290,17 +290,17 @@ if __name__ == "__main__":
         ext_modules=extensions,
         maintainer_email='ryan.dale at nih.gov',
         description='Wrapper around BEDTools for bioinformatics work',
-        license='GPLv2',
+        license='MIT',
         url='https://github.com/daler/pybedtools',
         download_url='',
         long_description=README,
         zip_safe=False,
         setup_requires=[],
-        install_requires=['six', 'pysam'],
+        install_requires=['six', 'pysam', 'numpy'],
         classifiers=[
             'Development Status :: 5 - Production/Stable',
             'Intended Audience :: Science/Research',
-            'License :: OSI Approved :: GNU General Public License (GPL)',
+            'License :: OSI Approved :: MIT License',
             'Topic :: Scientific/Engineering :: Bio-Informatics',
             'Programming Language :: Python',
             'Programming Language :: Python :: 3',
@@ -325,4 +325,5 @@ if __name__ == "__main__":
                       'src': ['src/*'],
                       },
         include_package_data=True,
+        language_level=2,
     )



View it on GitLab: https://salsa.debian.org/med-team/python-pybedtools/-/commit/fe2190f2237eb4b7808659449ccc2520d2fc5fd2

-- 
View it on GitLab: https://salsa.debian.org/med-team/python-pybedtools/-/commit/fe2190f2237eb4b7808659449ccc2520d2fc5fd2
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20231031/da2b8614/attachment-0001.htm>