[med-svn] [python-pybedtools] 01/08: New upstream version 0.7.10
Andreas Tille
tille at debian.org
Wed Jul 19 19:35:40 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository python-pybedtools.
commit bb15845b13063c3c735b617fa7ad34a281772eba
Author: Andreas Tille <tille at debian.org>
Date: Wed Jul 19 21:12:29 2017 +0200
New upstream version 0.7.10
---
.travis.yml | 7 +-
condatest.sh | 66 ++++++-----
docs/source/_static/custom.css | 3 +
.../autodocs/pybedtools.contrib.plotting.Track.rst | 10 +-
docs/source/changes.rst | 28 +++++
docs/source/conf.py | 2 +-
docs/source/topical-saving.rst | 43 +++++---
optional-requirements.txt | 6 +
pybedtools/bedtool.py | 108 ++++++++++++++++--
pybedtools/contrib/bigwig.py | 52 ++++++++-
pybedtools/contrib/long_range_interaction.py | 6 +-
pybedtools/settings.py | 1 +
pybedtools/test/data/gdc.othersort.bam | Bin 0 -> 275 bytes
pybedtools/test/mpl-expected.png | Bin 31626 -> 24450 bytes
pybedtools/test/test1.py | 121 +++++++++++++++------
pybedtools/version.py | 10 +-
requirements.txt | 6 +-
setup.py | 37 ++++---
requirements.txt => test-requirements.txt | 3 +-
19 files changed, 387 insertions(+), 122 deletions(-)
diff --git a/.travis.yml b/.travis.yml
index 34f7784..6f2e7f3 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,8 +7,9 @@ sudo: false
python:
- "2.7"
- - "3.3"
+ #- "3.3"
- "3.4"
+ - "3.5"
notifications:
email:
@@ -44,7 +45,3 @@ install:
script:
- ./condatest.sh "$TRAVIS_PYTHON_VERSION"
-
-branches:
- only:
- - master
diff --git a/condatest.sh b/condatest.sh
index ceba1ba..2f64621 100755
--- a/condatest.sh
+++ b/condatest.sh
@@ -1,4 +1,4 @@
-#1/bin/bash
+#!/bin/bash
# Installs pybedtools and requirements into a fresh Python 2 or 3 environment
# and runs tests.
@@ -13,40 +13,56 @@ PY_VERSION=$1
usage="Usage: $0 py_version[2|3]"
: ${PY_VERSION:?$usage}
-# Ensure we're starting the environment from scratch
+log () {
+ echo
+ echo "[`date`] TEST HARNESS: $1"
+ echo
+}
+
+log "removing existing env pbtpy${PY_VERSION}"
name=pbtpy${PY_VERSION}
-conda env remove -y -n $name
+conda env list | grep -q $name && conda env remove -y -n $name
-# Force the re-Cythonizing
-rm -rf dist build
+log "starting with basic environment"
+conda create -y -n $name --channel bioconda python=${PY_VERSION} \
+ bedtools \
+ "htslib<1.4" \
+ ucsc-bedgraphtobigwig \
+ ucsc-bigwigtobedgraph
+source activate $name
+log "temporarily install cython"
+conda install cython
+
+log "force re-cythonizing"
+rm -rf dist build
python setup.py clean
python setup.py build
python setup.py sdist
-conda create \
- -y \
- -c daler \
- -n $name \
- python=${PY_VERSION} \
- bedtools=2.25.0 \
- matplotlib \
- sphinx \
- numpydoc \
- tabix \
- pysam \
- nose \
- six \
- pyyaml \
- pandas
+log "uninstall cython"
+conda remove cython
-source activate $name
+log "test installation of sdist"
+set -x
+(cd dist && pip install pybedtools-*.tar.gz && python -c 'import pybedtools')
+set +x
-# test installation via pip; just test that we can import successfully:
-pip install dist/pybedtools-*.tar.gz
-(cd docs && python -c 'import pybedtools')
+python setup.py clean
+
+log "install test requirements"
+source deactivate
+conda env list | grep -q $name && conda env remove -y -n $name
+conda create -y -n $name --channel bioconda python=${PY_VERSION} \
+ --file "requirements.txt" \
+ --file "test-requirements.txt" \
+ --file "optional-requirements.txt"
-# Now actually build from source dir
+source activate $name
+
+log "install pybedtools from setup.py in develop mode to trigger re-cythonizing"
python setup.py develop
+
+log "run tests"
nosetests
(cd docs && make clean && make doctest)
diff --git a/docs/source/_static/custom.css b/docs/source/_static/custom.css
new file mode 100644
index 0000000..36b2760
--- /dev/null
+++ b/docs/source/_static/custom.css
@@ -0,0 +1,3 @@
+div.highlight-python pre {
+ font-size: 0.7em;
+}
diff --git a/docs/source/autodocs/pybedtools.contrib.plotting.Track.rst b/docs/source/autodocs/pybedtools.contrib.plotting.Track.rst
index c5498de..a965f2d 100644
--- a/docs/source/autodocs/pybedtools.contrib.plotting.Track.rst
+++ b/docs/source/autodocs/pybedtools.contrib.plotting.Track.rst
@@ -25,6 +25,7 @@ pybedtools.contrib.plotting.Track
~Track.convert_yunits
~Track.draw
~Track.findobj
+ ~Track.format_cursor_data
~Track.get_agg_filter
~Track.get_alpha
~Track.get_animated
@@ -37,6 +38,7 @@ pybedtools.contrib.plotting.Track
~Track.get_clip_path
~Track.get_cmap
~Track.get_contains
+ ~Track.get_cursor_data
~Track.get_dashes
~Track.get_datalim
~Track.get_edgecolor
@@ -44,6 +46,7 @@ pybedtools.contrib.plotting.Track
~Track.get_facecolor
~Track.get_facecolors
~Track.get_figure
+ ~Track.get_fill
~Track.get_gid
~Track.get_hatch
~Track.get_label
@@ -95,7 +98,6 @@ pybedtools.contrib.plotting.Track
~Track.set_clip_path
~Track.set_cmap
~Track.set_color
- ~Track.set_colorbar
~Track.set_contains
~Track.set_dashes
~Track.set_edgecolor
@@ -110,7 +112,6 @@ pybedtools.contrib.plotting.Track
~Track.set_linestyles
~Track.set_linewidth
~Track.set_linewidths
- ~Track.set_lod
~Track.set_lw
~Track.set_norm
~Track.set_offset_position
@@ -127,6 +128,7 @@ pybedtools.contrib.plotting.Track
~Track.set_url
~Track.set_urls
~Track.set_verts
+ ~Track.set_verts_and_codes
~Track.set_visible
~Track.set_zorder
~Track.to_rgba
@@ -143,7 +145,11 @@ pybedtools.contrib.plotting.Track
.. autosummary::
~Track.aname
+ ~Track.axes
~Track.midpoint
+ ~Track.mouseover
+ ~Track.stale
+ ~Track.sticky_edges
~Track.zorder
\ No newline at end of file
diff --git a/docs/source/changes.rst b/docs/source/changes.rst
index 6339a03..b805cc4 100644
--- a/docs/source/changes.rst
+++ b/docs/source/changes.rst
@@ -2,6 +2,34 @@
Changelog
=========
+Changes in v0.7.10
+------------------
+Various bug fixes and some minor feature additions:
+
+* Support for comma-separated lists for `mapBed` (thanks Chuan-Sheng Foo)
+* Support many calls to `tabix_intervals` without hitting a "Too many open
+ files" error (`#190 <https://github.com/daler/pybedtools/issues/190>`_)
+* Clarify documentation for `genome_coverage` when used with default
+ parameters (`#113 <https://github.com/daler/pybedtools/issues/113>`_)
+* Ignore stderr from samtools on older zlib versions (`#209 <https://github.com/daler/pybedtools/issues/209>`_, thanks Gert Hulselmans)
+* Support fetching all regions from a chromosome (`#201 <https://github.com/daler/pybedtools/issues/210>`_, thanks Matt Stone)
+* Add wrapper for `shiftBed` (`#200 <https://github.com/daler/pybedtools/issues/200>`_, thanks Saket Choudhary)
+* Fix `truncate_to_chrom` in Python 3 (`#203 <https://github.com/daler/pybedtools/issues/203>`_, thanks Saulius Lukauskas)
+* When making bigWigs, use `bedSort` to ensure the sort order matches that expected by UCSC tools (`#178 <https://github.com/daler/pybedtools/issues/178>`_)
+* Fix newline handling of `pysam.ctabix.TabixIterator` output (`#196 <https://github.com/daler/pybedtools/issues/196>`_)
+
+
+Changes in v0.7.9
+-----------------
+Minor bugfix release:
+
+* add `contrib.bigwig.bigwigtobedgraph` (thanks Justin Fear)
+* fix `BedTool.seq()` in Python 3
+* fix intron creation (`#182 <https://github.com/daler/pybedtools/pull/182>`_, thanks @mmendez12)
+* add `six` as an explicit requirement (`#184 <https://github.com/daler/pybedtools/pull/184>`_, thanks @jrdemasi)
+* improvements to setup (`<https://github.com/daler/pybedtools/issues/185>`_)
+* make pandas fully optional
+
Changes in v0.7.8
-----------------
* Be more careful about BAM vs bgzipped files (#168)
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 4e71b53..b372b73 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -141,7 +141,7 @@ html_use_smartypants = False
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
-#html_static_path = ['_static']
+html_static_path = ['_static']
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
diff --git a/docs/source/topical-saving.rst b/docs/source/topical-saving.rst
index 1e91924..df0d3c8 100644
--- a/docs/source/topical-saving.rst
+++ b/docs/source/topical-saving.rst
@@ -69,22 +69,33 @@ a 2-column file containing the number of intervals in each featuretype:
We can read the file created by `d` looks like this:
+(note: the latest version of BEDTools, v2.26.0, causes this to fail. This will
+be fixed in the next BEDTools release (see
+https://github.com/arq5x/bedtools2/issues/453,
+https://github.com/arq5x/bedtools2/issues/450,
+https://github.com/arq5x/bedtools2/issues/435,
+https://github.com/arq5x/bedtools2/issues/436 for details).
+
.. doctest::
:options: +NORMALIZE_WHITESPACE
+ >>> # bedtools v2.26.0
>>> print(open(d.fn).read())
- UTR 0
- CDS 2
- intron 4
- CDS 0
- UTR 1
- exon 3
- mRNA 7
- CDS 2
- exon 2
- tRNA 2
- gene 7
- <BLANKLINE>
+ 30
+
+ >>> # bedtools != v2.26.0
+ >>> # UTR 0
+ >>> # CDS 2
+ >>> # intron 4
+ >>> # CDS 0
+ >>> # UTR 1
+ >>> # exon 3
+ >>> # mRNA 7
+ >>> # CDS 2
+ >>> # exon 2
+ >>> # tRNA 2
+ >>> # gene 7
+ >>> # <BLANKLINE>
Trying to iterate over `d` (`[i for i in d]`) or save it (`d.saveas()`) raises
@@ -103,12 +114,14 @@ argument instead of `saveas()`, like this:
.. doctest::
- >>> d = c.groupby(g=[3], c=10, o=['sum'], output='counts.txt')
+ >>> # only works with bedtools != v2.26.0
+ >>> # d = c.groupby(g=[3], c=10, o=['sum'], output='counts.txt')
To iterate over the lines of the file, you can use standard Python
tools, e.g.:
.. doctest::
- >>> for line in open(d.fn):
- ... featuretype, count = line.strip().split()
+ >>> # only works with bedtools != v2.26.0
+ >>> # for line in open(d.fn):
+ >>> # featuretype, count = line.strip().split()
diff --git a/optional-requirements.txt b/optional-requirements.txt
new file mode 100644
index 0000000..c5d2f9f
--- /dev/null
+++ b/optional-requirements.txt
@@ -0,0 +1,6 @@
+bedtools
+matplotlib
+htslib <1.4
+ucsc-bigwigtobedgraph
+ucsc-bedgraphtobigwig
+ucsc-wigtobigwig
diff --git a/pybedtools/bedtool.py b/pybedtools/bedtool.py
index b01862b..0f1cd8d 100644
--- a/pybedtools/bedtool.py
+++ b/pybedtools/bedtool.py
@@ -529,7 +529,10 @@ class BedTool(object):
The fields of the resulting BedTool will match the order of columns in
the dataframe.
"""
- import pandas
+ try:
+ import pandas
+ except ImportError:
+ raise ImportError("pandas must be installed to use dataframes")
if outfile is None:
outfile = self._tmp()
default_kwargs = dict(sep='\t', header=False, index=False)
@@ -589,7 +592,7 @@ class BedTool(object):
chromdict = helpers.chromsizes(genome)
tmp = self._tmp()
- fout = open(tmp, 'wb')
+ fout = open(tmp, 'w')
for chrom, coords in list(chromdict.items()):
start, stop = coords
start = str(start)
@@ -600,7 +603,7 @@ class BedTool(object):
def tabix_intervals(self, interval_or_string):
"""
- Retrieve all intervals within cooridnates from a "tabixed" BedTool.
+ Retrieve all intervals within coordinates from a "tabixed" BedTool.
Given either a string in "chrom:start-stop" format, or an interval-like
object with chrom, start, stop attributes, return a *streaming* BedTool
@@ -614,10 +617,38 @@ class BedTool(object):
# tabix expects 1-based coords, but BEDTools works with
# zero-based. pybedtools and pysam also work with zero-based. So we can
# pass zero-based directly to the pysam tabix interface.
- interval = helpers.string_to_interval(interval_or_string)
tbx = pysam.TabixFile(self.fn)
- results = tbx.fetch(str(interval.chrom), interval.start, interval.stop)
- return BedTool(results)
+
+ # If an interval is passed, use its coordinates directly
+ if isinstance(interval_or_string, Interval):
+ interval = interval_or_string
+ chrom, start, end = interval.chrom, interval.start, interval.stop
+ # Parse string directly instead of relying on Interval, in order to
+ # permit full chromosome fetching
+ else:
+ match = helpers.coord_re.search(interval_or_string)
+ # Assume string is contig if it doesn't fit chrom:start-end format
+ if match is None:
+ chrom = interval_or_string
+ start, end = None, None
+ # Otherwise parse the coordinates
+ else:
+ chrom, start, end = match.group(1, 2, 3)
+ start, end = int(start), int(end)
+
+ # Fetch results.
+ results = tbx.fetch(str(chrom), start, end)
+
+ # pysam.ctabix.TabixIterator does not include newlines when yielding so
+ # we need to add them.
+ def gen():
+ for i in results:
+ yield i + '\n'
+
+ # xref #190
+ x = BedTool(gen()).saveas()
+ tbx.close()
+ return x
def tabix(self, in_place=True, force=False, is_sorted=False):
"""
@@ -890,8 +921,8 @@ class BedTool(object):
# iterate over all the features in the gene.
s = self.sort()
if self.file_type == "gff":
- exon_iter = BedTool((f for f in s if f[2] == gene)).saveas()
- gene_iter = BedTool((f for f in s if f[2] == exon)).saveas()
+ exon_iter = BedTool((f for f in s if f[2] == exon)).saveas()
+ gene_iter = BedTool((f for f in s if f[2] == gene)).saveas()
elif self.file_type == "bed":
if s.field_count() == 12:
@@ -1245,6 +1276,7 @@ class BedTool(object):
'multiIntersectBed': ' ',
'mergeBed': ',',
'intersectBed': ' ',
+ 'mapBed': ',',
}
stdin = None
@@ -1680,7 +1712,6 @@ class BedTool(object):
'GATGAGTCT'
>>> BedTool.seq(('chr1', 1, 10), fn)
'GATGAGTCT'
-
"""
if isinstance(loc, six.string_types):
chrom, start_end = loc.split(":")
@@ -1692,7 +1723,7 @@ class BedTool(object):
loc = BedTool("%s\t%i\t%i" % (chrom, start, end), from_string=True)
lseq = loc.sequence(fi=fasta)
return "".join(
- [l.rstrip() for l in open(lseq.seqfn, 'rb')
+ [l.rstrip() for l in open(lseq.seqfn, 'r')
if l[0] != ">"])
@_log_to_history
@@ -1753,6 +1784,51 @@ class BedTool(object):
"""
@_log_to_history
+ @_wraps(prog='shiftBed', implicit='i', other=None, bam=None,
+ uses_genome=True)
+ def shift(self):
+ """
+ Wraps `bedtools shift`.
+
+ Shift each feature by user-defined number of bases. Returns a new BedTool object.
+
+ Example usage:
+
+ >>> a = pybedtools.example_bedtool('a.bed')
+
+ Shift every feature by 5bp:
+
+ >>> b = a.shift(genome='hg19', s=5)
+ >>> print(b) #doctest: +NORMALIZE_WHITESPACE
+ chr1 6 105 feature1 0 +
+ chr1 105 205 feature2 0 +
+ chr1 155 505 feature3 0 -
+ chr1 905 955 feature4 0 +
+ <BLANKLINE>
+
+ Shift features on the '+' strand by -1bp and on '-' strand by +3bp:
+
+ >>> b = a.shift(genome='hg19', p=-1, m=3)
+ >>> print(b) #doctest: +NORMALIZE_WHITESPACE
+ chr1 0 99 feature1 0 +
+ chr1 99 199 feature2 0 +
+ chr1 153 503 feature3 0 -
+ chr1 899 949 feature4 0 +
+ <BLANKLINE>
+
+ Shift features by a fraction of their length (0.50):
+
+ >>> b = a.shift(genome='hg19', pct=True, s=0.50)
+ >>> print(b) #doctest: +NORMALIZE_WHITESPACE
+ chr1 50 149 feature1 0 +
+ chr1 150 250 feature2 0 +
+ chr1 325 675 feature3 0 -
+ chr1 925 975 feature4 0 +
+ <BLANKLINE>
+
+ """
+
+ @_log_to_history
@_wraps(prog='mergeBed', implicit='i', other=None, bam=None)
def merge(self):
"""
@@ -1920,6 +1996,14 @@ class BedTool(object):
"""
Wraps `bedtools genomecov`.
+ Note that some invocations of `bedtools genomecov` do not result in
+ a properly-formatted BED file. For example, the default behavior is to
+ report a histogram of coverage. Iterating over the resulting,
+ non-BED-format file will raise exceptions in pybedtools' parser.
+
+ Consider using the `BedTool.to_dataframe` method to convert these
+ non-BED files into a pandas DataFrame for further use.
+
Example usage:
BAM file input does not require a genome:
@@ -1940,6 +2024,10 @@ class BedTool(object):
chr2L 10212 10248 1
chr2L 10255 10291 1
+ Non-BED format results:
+ >>> a = pybedtools.example_bedtool('x.bed')
+ >>> b = a.genome_coverage(genome='dm3')
+ >>> df = b.to_dataframe(names=['chrom', 'depth', 'n', 'chromsize', 'fraction'])
"""
diff --git a/pybedtools/contrib/bigwig.py b/pybedtools/contrib/bigwig.py
index 907701a..ab52202 100644
--- a/pybedtools/contrib/bigwig.py
+++ b/pybedtools/contrib/bigwig.py
@@ -27,7 +27,7 @@ def mapped_read_count(bam, force=False):
p = subprocess.Popen(cmds, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
- if stderr:
+ if p.returncode:
raise ValueError('samtools says: %s' % stderr)
readcount = float(stdout)
@@ -48,10 +48,38 @@ def bedgraph_to_bigwig(bedgraph, genome, output):
bedgraph.fn,
genome_file,
output]
- os.system(' '.join(cmds))
+ p = subprocess.Popen(cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ stdout, stderr = p.communicate()
+ if p.returncode:
+ raise ValueError("cmds: %s\nstderr:%s\nstdout:%s"
+ % (" ".join(cmds), stderr, stdout))
return output
+def bigwig_to_bedgraph(fn, chrom=None, start=None, end=None, udcDir=None):
+ cmds = [
+ 'bigWigToBedGraph',
+ fn]
+ if chrom is not None:
+ cmds.extend(['-chrom', chrom])
+ if start is not None:
+ cmds.extend(['-start', start])
+ if end is not None:
+ cmds.extend(['-end', end])
+ if udcDir is not None:
+ cmds.extend(['-udcDir', udcDir])
+
+ outfn = pybedtools.BedTool._tmp()
+ cmds.append(outfn)
+
+ p = subprocess.Popen(cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ stdout, stderr = p.communicate()
+ if p.returncode:
+ raise ValueError("cmds: %s\nstderr:%s\nstdout:%s"
+ % (" ".join(cmds), stderr, stdout))
+ return pybedtools.BedTool(outfn)
+
+
def wig_to_bigwig(wig, genome, output):
genome_file = pybedtools.chromsizes_to_file(pybedtools.chromsizes(genome))
cmds = [
@@ -59,7 +87,11 @@ def wig_to_bigwig(wig, genome, output):
wig.fn,
genome_file,
output]
- os.system(' '.join(cmds))
+ subprocess.Popen(cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ stdout, stderr = p.communicate()
+ if p.returncode:
+ raise ValueError('cmds: %s\nstderr:%s\nstdout:%s'
+ % (' '.join(cmds), stderr, stdout))
return output
@@ -88,4 +120,16 @@ def bam_to_bigwig(bam, genome, output, scale=False):
x.fn,
genome_file,
output]
- os.system(' '.join(cmds))
+ p = subprocess.Popen(cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
+ stdout, stderr = p.communicate()
+
+ if p.returncode and 'bedSort' in stderr:
+ print('BAM header was not sorted; sorting bedGraph')
+ y = x.sort()
+ cmds[1] = y.fn
+ p = subprocess.Popen(cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
+ stdout, stderr = p.communicate()
+
+ if p.returncode:
+ raise ValueError('cmds: %s\nstderr: %s\nstdout: %s'
+ % (' '.join(cmds), stderr, stdout))
diff --git a/pybedtools/contrib/long_range_interaction.py b/pybedtools/contrib/long_range_interaction.py
index 38d1249..8f8eb12 100644
--- a/pybedtools/contrib/long_range_interaction.py
+++ b/pybedtools/contrib/long_range_interaction.py
@@ -4,8 +4,6 @@ import sys
import itertools
import six
import time
-import numpy as np
-import pandas
import pysam
import pybedtools
@@ -335,6 +333,10 @@ def cis_trans_interactions(iterator, n, extra, verbose=True):
set([u'gene1', u'peak1'])
"""
+ try:
+ import pandas
+ except ImportError:
+ raise ImportError("pandas must be installed to use this function")
c = 0
lines = []
for label, end1_hits, end2_hits in iterator:
diff --git a/pybedtools/settings.py b/pybedtools/settings.py
index 6fcde9c..07ec79e 100644
--- a/pybedtools/settings.py
+++ b/pybedtools/settings.py
@@ -31,6 +31,7 @@ _prog_names = {
'complementBed': 'complement',
'subtractBed': 'subtract',
'slopBed': 'slop',
+ 'shiftBed': 'shift',
'flankBed': 'flank',
'sortBed': 'sort',
'randomBed': 'random',
diff --git a/pybedtools/test/data/gdc.othersort.bam b/pybedtools/test/data/gdc.othersort.bam
new file mode 100644
index 0000000..a0ba01e
Binary files /dev/null and b/pybedtools/test/data/gdc.othersort.bam differ
diff --git a/pybedtools/test/mpl-expected.png b/pybedtools/test/mpl-expected.png
index ba92877..f069f0c 100644
Binary files a/pybedtools/test/mpl-expected.png and b/pybedtools/test/mpl-expected.png differ
diff --git a/pybedtools/test/test1.py b/pybedtools/test/test1.py
index 8341747..4ad8bc2 100644
--- a/pybedtools/test/test1.py
+++ b/pybedtools/test/test1.py
@@ -150,6 +150,9 @@ def test_tabix_intervals():
assert len(a.tabix_intervals('chr1:30-35[-]')) == 0
assert len(a.tabix_intervals('chr1:29-30[-]')) == 1
+ # permit fetching of a contig without a specified region
+ assert len(a.tabix_intervals('chr1')) == 1
+
# ----------------------------------------------------------------------------
# Streaming and non-file BedTool tests
# ----------------------------------------------------------------------------
@@ -1952,50 +1955,55 @@ def test_issue_156():
# that lists of filenames works.
a = pybedtools.example_bedtool('a.bed')
b = [pybedtools.example_filename('b.bed'), pybedtools.example_filename('c.gff')]
- assert str(a.intersect(b)) == fix(
+ res = str(a.intersect(b))
+ assert res == fix(
"""
- chr1 60 100 feature1 0 +
- chr1 155 200 feature2 0 +
- chr1 174 200 feature2 0 +
- chr1 174 200 feature2 0 +
- chr1 100 200 feature2 0 +
- chr1 155 200 feature3 0 -
- chr1 465 500 feature3 0 -
- chr1 486 500 feature3 0 -
- chr1 174 326 feature3 0 -
- chr1 439 500 feature3 0 -
- chr1 496 500 feature3 0 -
- chr1 486 500 feature3 0 -
- chr1 174 326 feature3 0 -
- chr1 439 500 feature3 0 -
- chr1 150 269 feature3 0 -
- chr1 900 901 feature4 0 +
- chr1 900 913 feature4 0 +
- chr1 900 913 feature4 0 +
- chr1 900 950 feature4 0 +
- """)
- assert str(a.intersect(b, wb=True, names=['B', 'C'])) == fix(
+ chr1 59 100 feature1 0 +
+ chr1 155 200 feature2 0 +
+ chr1 173 200 feature2 0 +
+ chr1 173 200 feature2 0 +
+ chr1 100 200 feature2 0 +
+ chr1 155 200 feature3 0 -
+ chr1 464 500 feature3 0 -
+ chr1 485 500 feature3 0 -
+ chr1 173 326 feature3 0 -
+ chr1 438 500 feature3 0 -
+ chr1 495 500 feature3 0 -
+ chr1 485 500 feature3 0 -
+ chr1 173 326 feature3 0 -
+ chr1 438 500 feature3 0 -
+ chr1 150 269 feature3 0 -
+ chr1 900 901 feature4 0 +
+ chr1 900 913 feature4 0 +
+ chr1 900 913 feature4 0 +
+ chr1 900 950 feature4 0 +
+ """), res
+
+ res = str(a.intersect(b, wb=True, names=['B', 'C']))
+ assert res == fix(
"""
- chr1 60 100 feature1 0 + C chr1 ucb gene 60 269 . - . ID=thaliana_1_6160_6269;match=fgenesh1_pg.C_scaffold_1000119;rname=thaliana_1_6160_6269
+ chr1 59 100 feature1 0 + C chr1 ucb gene 60 269 . - . ID=thaliana_1_6160_6269;match=fgenesh1_pg.C_scaffold_1000119;rname=thaliana_1_6160_6269
chr1 155 200 feature2 0 + B chr1 155 200 feature5 0 -
- chr1 174 200 feature2 0 + C chr1 ucb CDS 174 326 . + . Parent=AT1G01010.mRNA;rname=AT1G01010
- chr1 174 200 feature2 0 + C chr1 ucb mRNA 174 326 . + . ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010
+ chr1 173 200 feature2 0 + C chr1 ucb CDS 174 326 . + . Parent=AT1G01010.mRNA;rname=AT1G01010
+ chr1 173 200 feature2 0 + C chr1 ucb mRNA 174 326 . + . ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010
chr1 100 200 feature2 0 + C chr1 ucb gene 60 269 . - . ID=thaliana_1_6160_6269;match=fgenesh1_pg.C_scaffold_1000119;rname=thaliana_1_6160_6269
chr1 155 200 feature3 0 - B chr1 155 200 feature5 0 -
- chr1 465 500 feature3 0 - C chr1 ucb gene 465 805 . + . ID=thaliana_1_465_805;match=scaffold_801404.1;rname=thaliana_1_465_805
- chr1 486 500 feature3 0 - C chr1 ucb CDS 486 605 . + . Parent=AT1G01010.mRNA;rname=AT1G01010
- chr1 174 326 feature3 0 - C chr1 ucb CDS 174 326 . + . Parent=AT1G01010.mRNA;rname=AT1G01010
- chr1 439 500 feature3 0 - C chr1 ucb CDS 439 630 . + . Parent=AT1G01010.mRNA;rname=AT1G01010
- chr1 496 500 feature3 0 - C chr1 ucb mRNA 496 576 . + . ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010
- chr1 486 500 feature3 0 - C chr1 ucb mRNA 486 605 . + . ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010
- chr1 174 326 feature3 0 - C chr1 ucb mRNA 174 326 . + . ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010
- chr1 439 500 feature3 0 - C chr1 ucb mRNA 439 899 . + . ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010
+ chr1 464 500 feature3 0 - C chr1 ucb gene 465 805 . + . ID=thaliana_1_465_805;match=scaffold_801404.1;rname=thaliana_1_465_805
+ chr1 485 500 feature3 0 - C chr1 ucb CDS 486 605 . + . Parent=AT1G01010.mRNA;rname=AT1G01010
+ chr1 173 326 feature3 0 - C chr1 ucb CDS 174 326 . + . Parent=AT1G01010.mRNA;rname=AT1G01010
+ chr1 438 500 feature3 0 - C chr1 ucb CDS 439 630 . + . Parent=AT1G01010.mRNA;rname=AT1G01010
+ chr1 495 500 feature3 0 - C chr1 ucb mRNA 496 576 . + . ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010
+ chr1 485 500 feature3 0 - C chr1 ucb mRNA 486 605 . + . ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010
+ chr1 173 326 feature3 0 - C chr1 ucb mRNA 174 326 . + . ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010
+ chr1 438 500 feature3 0 - C chr1 ucb mRNA 439 899 . + . ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010
chr1 150 269 feature3 0 - C chr1 ucb gene 60 269 . - . ID=thaliana_1_6160_6269;match=fgenesh1_pg.C_scaffold_1000119;rname=thaliana_1_6160_6269
chr1 900 901 feature4 0 + B chr1 800 901 feature6 0 +
chr1 900 913 feature4 0 + C chr1 ucb mRNA 631 913 . + . ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010
chr1 900 913 feature4 0 + C chr1 ucb CDS 760 913 . + . Parent=AT1G01010.mRNA;rname=AT1G01010
chr1 900 950 feature4 0 + C chr1 ucb CDS 706 1095 . + . Parent=AT1G01010.mRNA;rname=AT1G01010
- """)
+ """), res
+
+
def test_issue_157():
# the problem here was that converting to file from dataframe didn't pass
@@ -2066,3 +2074,48 @@ def test_issue_169():
line = gzip.open(fn, 'rt').readline()
assert str(line).startswith('#'), line
+def test_issue_196():
+ bed = pybedtools.BedTool(
+ '''
+ 8 129185980 129186130 A 0.1
+ 8 129185980 129186130 B 0.2
+ ''', from_string=True)
+ bed = bed.tabix()
+ snp = pybedtools.BedTool("8\t129186110\t129186111\trs72722756", from_string=True)
+ intersection = bed.tabix_intervals('{}:{}-{}'.format("8",129186110,129186111)).intersect(snp, wa=True, wb=True)
+
+ # prior to fixing this issue, intervals would be concatenated. This was
+ # because pysam.ctabix.tabixIterator does not include newlines when
+ # yielding. The incorrect output was this:
+ '''
+ 8 129185980 129186130 A 0.18 129185980 129186130 B 0.2 8 129186110 129186111 rs72722756
+ '''
+
+ # but should be this:
+ assert intersection == fix(
+ '''
+ 8 129185980 129186130 A 0.1 8 129186110 129186111 rs72722756
+ 8 129185980 129186130 B 0.2 8 129186110 129186111 rs72722756
+ ''')
+
+
+def test_issue_178():
+ fn = pybedtools.example_filename('gdc.othersort.bam')
+ pybedtools.contrib.bigwig.bam_to_bigwig(fn, genome='dm3', output='tmp.bw')
+ x = pybedtools.contrib.bigwig.bigwig_to_bedgraph('tmp.bw')
+ assert x == fix(
+ '''
+ chr2L 70 75 1
+ chr2L 140 145 1
+ chr2L 150 155 1
+ chr2L 160 165 1
+ chr2L 210 215 1
+ chrX 10 15 1
+ chrX 70 75 1
+ chrX 140 145 1
+ ''')
+ os.unlink('tmp.bw')
+
+def test_issue_203():
+ x = pybedtools.example_bedtool('x.bed')
+ x.truncate_to_chrom(genome='hg19')
diff --git a/pybedtools/version.py b/pybedtools/version.py
index 42b6cef..84cbbc1 100644
--- a/pybedtools/version.py
+++ b/pybedtools/version.py
@@ -1,11 +1,11 @@
# THIS FILE IS GENERATED FROM SETUP.PY
-short_version = '0.7.8'
-version = '0.7.8'
-full_version = '0.7.8'
-git_revision = '1c8e7c95f8c6e1ac420b69d3f637438f71fc99f1'
+short_version = '0.7.10'
+version = '0.7.10'
+full_version = '0.7.10'
+git_revision = 'e082757cb02f6abac7fe7cb31a31e92e5282e3e6'
release = True
__version__ = version
if not release:
- version = full_version
\ No newline at end of file
+ version = full_version
diff --git a/requirements.txt b/requirements.txt
index a67cdbe..bbbbe36 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,5 @@
-numpydoc
-pyyaml
cython
+numpy
+pandas
+pysam
+six
diff --git a/setup.py b/setup.py
index 04f714f..b84a272 100644
--- a/setup.py
+++ b/setup.py
@@ -29,19 +29,13 @@ try:
from ez_setup import use_setuptools
use_setuptools(version="0.6c5")
from setuptools import setup, Command, find_packages
- _have_setuptools = True
except ImportError:
- # no setuptools installed
- from distutils.core import setup, Command
- _have_setuptools = False
+ sys.exit(
+ 'pybedtools uses setuptools (https://packaging.python.org/installing/) '
+ 'for installation but setuptools was not found')
-if _have_setuptools:
- setuptools_kwargs = {"zip_safe": False,
- "test_suite": "nose.collector"}
-else:
- setuptools_kwargs = {}
- if sys.version_info[0] >= 3:
- sys.exit("Need setuptools to install pybedtools for Python 3.x")
+setuptools_kwargs = {"zip_safe": False,
+ "test_suite": "nose.collector"}
curdir = os.path.abspath(os.path.dirname(__file__))
@@ -119,6 +113,17 @@ def check_dependency_versions(min_versions):
raise ImportError("Pysam version is %s. Requires >= %s" %
(pysam_version, min_versions['pysam']))
+ if 'numpy' in min_versions:
+ try:
+ from numpy import __version__ as numpy_version
+ except ImportError:
+ install_requires.append('numpy')
+ else:
+ if not (LooseVersion(numpy_version) >= min_versions['numpy']):
+ raise ImportError("numpy version is %s. Requires >= %s" %
+ (numpy_version, min_versions['numpy']))
+
+
if 'pandas' in min_versions:
try:
from pandas import __version__ as pandas_version
@@ -134,7 +139,7 @@ def check_dependency_versions(min_versions):
MAJ = 0
MIN = 7
-REV = 8
+REV = 9
ISRELEASED = True
VERSION = '%d.%d.%d' % (MAJ, MIN, REV)
@@ -317,13 +322,13 @@ for name, data in ext_data.items():
if __name__ == "__main__":
min_versions = {
'pysam': '0.8.1',
+ #'pandas': '0.16',
}
(setup_requires,
install_requires) = check_dependency_versions(min_versions)
- if _have_setuptools:
- setuptools_kwargs['setup_requires'] = setup_requires
- setuptools_kwargs['install_requires'] = install_requires
- write_version_py()
+ setuptools_kwargs['setup_requires'] = setup_requires
+ setuptools_kwargs['install_requires'] = install_requires
+ write_version_py()
cwd = os.path.abspath(os.path.dirname(__file__))
if not os.path.exists(os.path.join(cwd, 'PKG-INFO')) and not no_frills:
diff --git a/requirements.txt b/test-requirements.txt
similarity index 57%
copy from requirements.txt
copy to test-requirements.txt
index a67cdbe..8ffeda9 100644
--- a/requirements.txt
+++ b/test-requirements.txt
@@ -1,3 +1,4 @@
+nose
numpydoc
pyyaml
-cython
+sphinx
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/python-pybedtools.git
More information about the debian-med-commit
mailing list