[med-svn] [Git][med-team/python-pybedtools][upstream] New upstream version 0.9.1
Lance Lin (@linqigang)
gitlab at salsa.debian.org
Tue Oct 31 15:41:12 GMT 2023
Lance Lin pushed to branch upstream at Debian Med / python-pybedtools
Commits:
fe2190f2 by Lance Lin at 2023-10-27T19:34:49+07:00
New upstream version 0.9.1
- - - - -
17 changed files:
- .github/workflows/main.yml
- README.rst
- docs/source/changes.rst
- docs/source/topical-genome.rst
- pybedtools/_Window.pyx
- pybedtools/bedtool.py
- pybedtools/cbedtools.pyx
- pybedtools/featurefuncs.pyx
- pybedtools/helpers.py
- pybedtools/scripts/venn_mpl.py
- + pybedtools/test/data/example.narrowPeak
- pybedtools/test/test_1.py
- pybedtools/test/test_helpers.py
- pybedtools/test/test_issues.py
- pybedtools/version.py
- + pyproject.toml
- setup.py
Changes:
=====================================
.github/workflows/main.yml
=====================================
@@ -1,11 +1,19 @@
name: main
-on: [push]
+on:
+ push:
+ branches:
+ - master
+ pull_request:
+ types:
+ - opened
+ - reopened
+ - synchronize
jobs:
build-and-test:
strategy:
matrix:
- python-version: [3.6, 3.7, 3.8, 3.9]
+ python-version: ["3.8", "3.9", "3.10", "3.11"]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout at v2
@@ -30,7 +38,7 @@ jobs:
# This only requires Cython, no other dependencies.
run: |
eval "$(conda shell.bash hook)"
- conda create -p ./cython-env -y cython
+ conda create -p ./cython-env -y cython python=${{ matrix.python-version }} numpy
conda activate ./cython-env
python setup.py clean cythonize sdist
(cd dist && pip install pybedtools-*.tar.gz && cd $TMPDIR && python -c 'import pybedtools; print(pybedtools.__file__)')
@@ -53,31 +61,53 @@ jobs:
# Tests below will operate in this newly-installed directory.
run: |
eval "$(conda shell.bash hook)"
- conda create -y -p ./test-env \
- --channel conda-forge \
- --channel bioconda python=${{ matrix.python-version }} \
- --file requirements.txt \
- --file test-requirements.txt \
- --file optional-requirements.txt
+ conda install mamba python=${{ matrix.python-version }} -y --channel conda-forge
+
+ if [ ${{ matrix.python-version }} != "3.11" ]; then
+ mamba create -y -p ./test-env \
+ --channel conda-forge \
+ --channel bioconda python=${{ matrix.python-version }} \
+ --file requirements.txt \
+ --file test-requirements.txt \
+ --file optional-requirements.txt
+ conda activate ./test-env
+ else
+ # Only install bedtools; let pip take care of the rest for 3.11 until
+ # bioconda catches up.
+ #
+ # We still install the test requirements though, and the optional
+ # requirements except for genomepy which is in bioconda.
+ grep -v "genomepy" optional-requirements.txt > optional-requirements-3.11.txt
+ mamba create -y -p ./test-env \
+ --channel conda-forge \
+ --channel bioconda \
+ bedtools \
+ python=${{ matrix.python-version }} \
+ --file test-requirements.txt \
+ --file optional-requirements-3.11.txt
+ conda activate ./test-env
+ pip install genomepy
+
+ fi
conda activate ./test-env
mkdir -p /tmp/pybedtools-uncompressed
cd /tmp/pybedtools-uncompressed
tar -xf $WORKDIR/dist/pybedtools-*.tar.gz
- cd pybedtools-*
- pip install -e .
- python -c 'import pybedtools; print(pybedtools.__file__)'
- ls *
+ pip install -e /tmp/pybedtools-uncompressed/pybedtools-*
+ # Trying import in the same directory will complain that cbedtools
+ # can't be imported
+ (cd / && python -c 'import pybedtools; print(pybedtools.__file__)')
- name: tests
# Run pytest and sphinx doctests
run: |
- eval "$(conda shell.bash hook)"
cd $WORKDIR
+ eval "$(conda shell.bash hook)"
conda activate ./test-env
- # Move to extracted tarball dir, see above notes
+ # Extract the package tarball built above, and use that for running the tests.
cd /tmp/pybedtools-uncompressed/pybedtools-*
pytest -v --doctest-modules
pytest -v pybedtools/test/genomepy_integration.py
@@ -89,29 +119,31 @@ jobs:
# Build docs and commit to gh-pages branch. Note that no push happens
# unless we're on the master branch
run: |
- eval "$(conda shell.bash hook)"
- conda activate ./test-env
-
- # Move to extracted tarball dir, see above notes
- cd /tmp/pybedtools-uncompressed/pybedtools-*
- (cd docs && make html)
-
- git clone \
- --single-branch \
- --branch gh-pages "https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY" \
- /tmp/docs
-
- rm -rf /tmp/docs/*
- cp -r docs/build/html/* /tmp/docs
- touch /tmp/docs/.nojekyll
- cd /tmp/docs
- git add .
- if git diff --cached --quiet; then
- echo "no changes, nothing to commit"
- else
- git commit -m 'update docs'
+ if [ ${{ matrix.python-version }} != "3.11" ]; then
+ eval "$(conda shell.bash hook)"
+ conda activate ./test-env
+
+ # Move to extracted tarball dir, see above notes
+ cd /tmp/pybedtools-uncompressed/pybedtools-*
+ (cd docs && make html)
+
+ git clone \
+ --single-branch \
+ --branch gh-pages "https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY" \
+ /tmp/docs
+
+ rm -rf /tmp/docs/*
+ cp -r docs/build/html/* /tmp/docs
+ touch /tmp/docs/.nojekyll
+ cd /tmp/docs
+ git add .
+ if git diff --cached --quiet; then
+ echo "no changes, nothing to commit"
+ else
+ git commit -m 'update docs'
+ fi
+ cd $WORKDIR
fi
- cd $WORKDIR
- name: docs artifact
=====================================
README.rst
=====================================
@@ -1,3 +1,4 @@
+
Overview
--------
=====================================
docs/source/changes.rst
=====================================
@@ -3,6 +3,26 @@
Changelog
=========
+Changes in v0.9.1
+-----------------
+
+2023-07-23
+
+* Dropping support for Python 3.6 and 3.7
+* Respect sorting of chromsize files (thanks @mgperry)
+* Updated setup.py to correctly reflect the MIT license change elsewhere (`#374
+ <https://github.com/daler/pybedtools/issues/374>`, thanks @hyandell)
+* Support plotting lengths of intervals and custom DPI (`#367
+ <https://github.com/daler/pybedtools/issues/367>`, `#366
+ <https://github.com/daler/pybedtools/issues/366>`), thanks @yunfeiguo)
+* Remove outdated hard-coded check for 510 files in ``intersect`` and instead
+ defer to local machine's ``ulimit``
+* Enabling building/installing on Python 3.11 (thanks @daz10000)
+* Allow np.int64 start/stop positions to be used when creating Interval objects (`#390 <https://github.com/daler/pybedtools/issues/390>`)
+* properly close filehandles in .save_seq (thanks @PeterRobots)
+* include minimal pyproject.toml file (thanks @afg1)
+
+
Changes in v0.9
---------------
=====================================
docs/source/topical-genome.rst
=====================================
@@ -98,20 +98,18 @@ will create a file from a dictionary or string:
'dm3.genome'
>>> print(open('dm3.genome').read())
chr2L 23011544
- chr2LHet 368872
chr2R 21146708
- chr2RHet 3288761
chr3L 24543557
- chr3LHet 2555491
chr3R 27905053
- chr3RHet 2517507
chr4 1351857
+ chrX 22422827
+ chr2LHet 368872
+ chr2RHet 3288761
+ chr3LHet 2555491
+ chr3RHet 2517507
chrM 19517
chrU 10049037
chrUextra 29004656
- chrX 22422827
chrXHet 204112
chrYHet 347038
<BLANKLINE>
-
-
=====================================
pybedtools/_Window.pyx
=====================================
@@ -1,4 +1,5 @@
# cython: profile=True
+# cython: language_level=2
import os
from collections import deque
=====================================
pybedtools/bedtool.py
=====================================
@@ -338,20 +338,6 @@ def _wraps(
if check_for_genome:
kwargs = self.check_genome(**kwargs)
- # TODO: should this be implemented as a generic function that can
- # be passed in for a each tool to check kwargs? Currently this is
- # the only check I can think of.
- if prog in ("intersect", "intersectBed"):
- if (
- isinstance(kwargs["b"], list)
- and len(kwargs["b"]) > 510
- and all([isinstance(i, str) for i in kwargs["b"]])
- ):
- raise pybedtoolsError(
- "BEDTools intersect does not support > 510 filenames for -b "
- "argument. Consider passing these as BedTool objects instead"
- )
-
# For sequence methods, we may need to make a tempfile that will
# hold the resulting sequence. For example, fastaFromBed needs to
# make a tempfile for 'fo' if no 'fo' was explicitly specified by
@@ -2130,7 +2116,7 @@ class BedTool(object):
"""
@_log_to_history
- @_wraps(prog="sortBed", implicit="i")
+ @_wraps(prog="sortBed", implicit="i", uses_genome=True, genome_if=["g", "genome"])
def sort(self):
"""
Wraps `bedtools sort`.
@@ -2320,8 +2306,8 @@ class BedTool(object):
chr1 0 1
chr1 500 900
chr1 950 249250621
- chr10 0 135534747
- chr11 0 135006516
+ chr2 0 243199373
+ chr3 0 198022430
"""
@_log_to_history
@@ -2726,9 +2712,11 @@ class BedTool(object):
if not hasattr(self, "seqfn"):
raise ValueError("Use .sequence(fasta) to get the sequence first")
- fout = open(fn, "w")
- fout.write(open(self.seqfn).read())
- fout.close()
+
+ with open(fn, "w") as fout:
+ with open(self.seqfn) as seqfile:
+ fout.write(seqfile.read())
+
new_bedtool = BedTool(self.fn)
new_bedtool.seqfn = fn
return new_bedtool
=====================================
pybedtools/cbedtools.pyx
=====================================
@@ -1,4 +1,5 @@
# distutils: language = c++
+# cython: language_level=2
# String notes:
#
@@ -15,6 +16,7 @@
from cpython.version cimport PY_MAJOR_VERSION
from libcpp.string cimport string
+import numpy as np
# Python byte strings automatically coerce to/from C++ strings.
@@ -23,7 +25,7 @@ cdef _cppstr(s):
#
# C++ uses bytestrings. PY2 strings need no conversion; bare PY3 strings
# are unicode and so must be encoded to bytestring.
- if isinstance(s, int):
+ if isinstance(s, integer_types):
s = str(s)
if isinstance(s, unicode):
s = s.encode('UTF-8')
@@ -36,9 +38,9 @@ cdef _pystr(string s):
return s.decode('UTF-8', 'strict')
if PY_MAJOR_VERSION < 3:
- integer_types = (int, long)
+ integer_types = (int, long, np.int64)
else:
- integer_types = (int,)
+ integer_types = (int, np.int64)
"""
bedtools.pyx: A Cython wrapper for the BEDTools BedFile class
=====================================
pybedtools/featurefuncs.pyx
=====================================
@@ -1,3 +1,4 @@
+# cython: language_level=2
# distutils: language = c++
from cbedtools cimport Interval
from cbedtools import create_interval_from_list
=====================================
pybedtools/helpers.py
=====================================
@@ -815,7 +815,7 @@ def chromsizes_to_file(chrom_sizes, fn=None):
if isinstance(chrom_sizes, str):
chrom_sizes = chromsizes(chrom_sizes)
fout = open(fn, "wt")
- for chrom, bounds in sorted(chrom_sizes.items()):
+ for chrom, bounds in chrom_sizes.items():
line = chrom + "\t" + str(bounds[1]) + "\n"
fout.write(line)
fout.close()
=====================================
pybedtools/scripts/venn_mpl.py
=====================================
@@ -17,8 +17,7 @@ import sys
import os
import pybedtools
-
-def venn_mpl(a, b, c, colors=None, outfn="out.png", labels=None):
+def venn_mpl(a, b, c, colors=None, outfn="out.png", labels=None, by_length=False, dpi=300):
"""
*a*, *b*, and *c* are filenames to BED-like files.
@@ -30,6 +29,11 @@ def venn_mpl(a, b, c, colors=None, outfn="out.png", labels=None):
*labels* is a list of labels to use for each of the files; by default the
labels are ['a','b','c']
+
+ *by_length* if True, then instead of plotting number of intervals, plot combined
+ lengths of intervals
+
+ *dpi* is the dpi setting passed to matplotlib savefig
"""
try:
import matplotlib.pyplot as plt
@@ -44,6 +48,9 @@ def venn_mpl(a, b, c, colors=None, outfn="out.png", labels=None):
a = pybedtools.BedTool(a)
b = pybedtools.BedTool(b)
c = pybedtools.BedTool(c)
+ count_features = lambda x:x.count()
+ if by_length:
+ count_features = lambda x:x.total_coverage()
if colors is None:
colors = ["r", "b", "g"]
@@ -89,35 +96,35 @@ def venn_mpl(a, b, c, colors=None, outfn="out.png", labels=None):
kwargs = dict(horizontalalignment="center")
# Unique to A
- ax.text(center - 2 * offset, center + offset, str((a - b - c).count()), **kwargs)
+ ax.text(center - 2 * offset, center + offset, str(count_features(a - b - c)), **kwargs)
# Unique to B
- ax.text(center + 2 * offset, center + offset, str((b - a - c).count()), **kwargs)
+ ax.text(center + 2 * offset, center + offset, str(count_features(b - a - c)), **kwargs)
# Unique to C
- ax.text(center, center - 2 * offset, str((c - a - b).count()), **kwargs)
+ ax.text(center, center - 2 * offset, str(count_features(c - a - b)), **kwargs)
# A and B not C
ax.text(
- center, center + 2 * offset - 0.5 * offset, str((a + b - c).count()), **kwargs
+ center, center + 2 * offset - 0.5 * offset, str(count_features(a + b - c)), **kwargs
)
# A and C not B
ax.text(
- center - 1.2 * offset, center - 0.5 * offset, str((a + c - b).count()), **kwargs
+ center - 1.2 * offset, center - 0.5 * offset, str(count_features(a + c - b)), **kwargs
)
# B and C not A
ax.text(
- center + 1.2 * offset, center - 0.5 * offset, str((b + c - a).count()), **kwargs
+ center + 1.2 * offset, center - 0.5 * offset, str(count_features(b + c - a)), **kwargs
)
# all
- ax.text(center, center, str((a + b + c).count()), **kwargs)
+ ax.text(center, center, str(count_features(a + b + c)), **kwargs)
ax.legend(loc="best")
- fig.savefig(outfn)
+ fig.savefig(outfn, dpi=dpi)
plt.close(fig)
=====================================
pybedtools/test/data/example.narrowPeak
=====================================
@@ -0,0 +1,5 @@
+track type=narrowPeak visibility=3 db=hg19 name="nPk" description="ENCODE narrowPeak Example"
+browser position chr1:9356000-9365000
+chr1 9356548 9356648 . 0 . 182 5.0945 -1 50
+chr1 9358722 9358822 . 0 . 91 4.6052 -1 40
+chr1 9361082 9361182 . 0 . 182 9.2103 -1 75
=====================================
pybedtools/test/test_1.py
=====================================
@@ -1542,7 +1542,8 @@ def test_window_maker():
x = pybedtools.BedTool()
z = x.window_maker(genome="hg19", w=100000)
assert str(z[0]) == "chr1\t0\t100000\n"
- assert str(z[10000]) == "chr16\t20800000\t20900000\n"
+ assert str(z[10000]) == "chr5\t118200000\t118300000\n"
+
def test_random():
@@ -1551,16 +1552,16 @@ def test_random():
print(result)
assert result == fix(
"""
- chr12 95739557 95739567 1 10 -
- chr2 113014250 113014260 2 10 -
- chr19 28057962 28057972 3 10 +
- chr4 76502010 76502020 4 10 +
- chr3 151395380 151395390 5 10 -
- chr2 43767824 43767834 6 10 +
- chr10 117350440 117350450 7 10 +
- chr6 65439870 65439880 8 10 +
- chr16 19569197 19569207 9 10 -
- chr7 104134021 104134031 10 10 -
+ chr3 123121550 123121560 1 10 -
+ chr8 10634720 10634730 2 10 -
+ chr7 24782325 24782335 3 10 +
+ chr12 19713230 19713240 4 10 +
+ chr11 31590686 31590696 5 10 -
+ chr7 100526957 100526967 6 10 +
+ chr2 117350440 117350450 7 10 +
+ chr15 25320997 25321007 8 10 +
+ chr5 116677189 116677199 9 10 -
+ chr17 75329339 75329349 10 10 -
"""
)
=====================================
pybedtools/test/test_helpers.py
=====================================
@@ -133,14 +133,14 @@ def test_chromsizes():
assert hg17["chr1"] == (0, 245522847)
fn = pybedtools.chromsizes_to_file(hg17, fn="hg17.genome")
- expected = "chr1\t245522847\n"
+ expected = "chr10\t135413628\n"
results = open(fn).readline()
print(results)
assert expected == results
# make sure the tempfile version works, too
fn = pybedtools.chromsizes_to_file(hg17, fn=None)
- expected = "chr1\t245522847\n"
+ expected = "chr10\t135413628\n"
results = open(fn).readline()
print(results)
assert expected == results
=====================================
pybedtools/test/test_issues.py
=====================================
@@ -733,38 +733,55 @@ def test_issue_258():
def test_issue_303():
# Issue 303 describes hitting a cap of 253 -b files. Locally I hit a limit
- # at 510, and observe the same on travis-ci.
+ # at 510 on Linux and observe the same on travis-ci. On macOS it's 256.
#
# The fix was to check the args in bedtool._wraps, and raise an exception
- # if there's more than 510 filenames provided. Note that it works find with
- # many BedTool objects.
+ # if there's more than supported filenames provided. Note that it works
+ # fine with many BedTool objects.
+
+ ulimit = subprocess.run(
+ ['/bin/bash', '-c', "ulimit -n"], capture_output=True, universal_newlines=True
+ )
+ ulimit = int(ulimit.stdout)
+ print(ulimit)
b = []
- for i in range(1000):
+ current_prefix = pybedtools.settings.tempfile_prefix
+ pybedtools.settings.tempfile_prefix = "/tmp/p"
+ for i in range(ulimit):
b.append(
pybedtools.BedTool(
"chr1\t{0}\t{1}\tb{0}".format(i, i + 1), from_string=True
)
)
+ pybedtools.settings.tempfile_prefix = current_prefix
a = pybedtools.example_bedtool("a.bed")
# Use many BedTool objects; this works
x = a.intersect(b, wao=True, filenames=True)
- # Try different cutoffs, providing filenames rather than BedTool objects:
+ # Try different cutoffs, providing filenames rather than BedTool objects.
+ # Note that on some systems this will hit `ARG_MAX` of the system before it
+ # hits the ulimit.
+ #
+ # Rather than find (and push) the limits of whatever system this test is
+ # running on, for now use 510 as a reasonable test for "many".
for n in [64, 256, 510]:
+ if n >= ulimit:
+ print('ulimit of', ulimit, 'reached; stopping')
+ break
b2 = [i.fn for i in b[:n]]
try:
y = a.intersect(b2)
- # If running on a system that supports <510 filenames, we'll get
+ # If running on a system that supports <n filenames, we'll get
# a BEDToolsError, so catch that and report here
- except pybedtools.helpers.BEDToolsError:
+ except (pybedtools.helpers.BEDToolsError, OSError):
raise ValueError("Hit a limit at {0} files".format(n))
# Otherwise, too many filenames should raise a pybedtoolsError as detected
# by the _wraps() function.
- with pytest.raises(pybedtools.helpers.pybedtoolsError):
+ with pytest.raises(pybedtools.helpers.BEDToolsError):
y = a.intersect([i.fn for i in b])
@@ -857,3 +874,48 @@ def test_issue_355():
break
assert line.split('\t')[1] == '14'
assert vcf[0].start == 13
+
+
+def test_genome_dict_sort():
+ genome = {
+ "chr1": (0, 5000),
+ "chr9": (0, 5000),
+ "chr12": (0, 5000),
+ }
+
+ # example taken from BedTool.sort() doctest
+ bed = pybedtools.BedTool(
+ """
+ chr9 300 400
+ chr1 100 200
+ chr1 1 50
+ chr12 1 100
+ chr9 500 600
+ """,
+ from_string=True,
+ )
+
+ result = bed.sort(genome=genome)
+
+ assert result == fix(
+ """
+ chr1 1 50
+ chr1 100 200
+ chr9 300 400
+ chr9 500 600
+ chr12 1 100
+ """
+ ), result
+
+
+def test_issue_365():
+ # confirming that narrowPeak works; #365 may be due to spaces rather than
+ # tabs in user's original file or maybe copying from UCSC
+ a = pybedtools.example_bedtool('example.narrowPeak')
+ a[0]
+
+def test_issue_390():
+ # Previously raised AttributeError: 'numpy.int64' object has no attribute 'isdigit'
+ # Fix was to include np.int64 as an integer type in cbedtools.pyx.
+ import numpy as np
+ pybedtools.BedTool([['chr1', np.int64(1), np.int64(2)]])
=====================================
pybedtools/version.py
=====================================
@@ -1,4 +1,4 @@
# THIS FILE IS GENERATED FROM SETUP.PY
-version = '0.8.2'
+version = '0.9.1'
__version__ = version
\ No newline at end of file
=====================================
pyproject.toml
=====================================
@@ -0,0 +1,3 @@
+[build-system]
+requires = ["setuptools>=61.0", "wheel", "Cython>=0.29.30,<3.0"]
+build-backend = "setuptools.build_meta:__legacy__"
\ No newline at end of file
=====================================
setup.py
=====================================
@@ -77,7 +77,7 @@ import distutils.log
MAJ = 0
MIN = 9
-REV = 0
+REV = 1
VERSION = '%d.%d.%d' % (MAJ, MIN, REV)
@@ -290,17 +290,17 @@ if __name__ == "__main__":
ext_modules=extensions,
maintainer_email='ryan.dale at nih.gov',
description='Wrapper around BEDTools for bioinformatics work',
- license='GPLv2',
+ license='MIT',
url='https://github.com/daler/pybedtools',
download_url='',
long_description=README,
zip_safe=False,
setup_requires=[],
- install_requires=['six', 'pysam'],
+ install_requires=['six', 'pysam', 'numpy'],
classifiers=[
'Development Status :: 5 - Production/Stable',
'Intended Audience :: Science/Research',
- 'License :: OSI Approved :: GNU General Public License (GPL)',
+ 'License :: OSI Approved :: MIT License',
'Topic :: Scientific/Engineering :: Bio-Informatics',
'Programming Language :: Python',
'Programming Language :: Python :: 3',
@@ -325,4 +325,5 @@ if __name__ == "__main__":
'src': ['src/*'],
},
include_package_data=True,
+ language_level=2,
)
View it on GitLab: https://salsa.debian.org/med-team/python-pybedtools/-/commit/fe2190f2237eb4b7808659449ccc2520d2fc5fd2
--
View it on GitLab: https://salsa.debian.org/med-team/python-pybedtools/-/commit/fe2190f2237eb4b7808659449ccc2520d2fc5fd2
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20231031/da2b8614/attachment-0001.htm>
More information about the debian-med-commit
mailing list