[med-svn] [python-xopen] 01/06: New upstream version 0.3.2
Andreas Tille
tille at debian.org
Sat Feb 10 12:35:01 UTC 2018
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository python-xopen.
commit 1cbdbf4f69dfb0e144d6e8836044557758ded3e3
Author: Andreas Tille <tille at debian.org>
Date: Sat Feb 10 13:27:16 2018 +0100
New upstream version 0.3.2
---
.travis.yml | 3 +-
README.rst | 21 ++--
setup.cfg | 2 -
setup.py | 32 +++---
tests/file.txt.bz2 | Bin 71 -> 118 bytes
tests/hello.gz | Bin 0 -> 25 bytes
tests/{testxopen.py => test_xopen.py} | 58 +++++++++--
tox.ini | 2 +-
xopen.py | 188 ++++++++++++++++++----------------
9 files changed, 189 insertions(+), 117 deletions(-)
diff --git a/.travis.yml b/.travis.yml
index 15895bb..311b5ae 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,15 +4,14 @@ cache:
directories:
- $HOME/.cache/pip
python:
- - "2.6"
- "2.7"
- "3.3"
- "3.4"
- "3.5"
+ - "3.6"
install:
- pip install .
script:
- nosetests -P tests
-
diff --git a/README.rst b/README.rst
index 248b9dd..96a4164 100644
--- a/README.rst
+++ b/README.rst
@@ -8,38 +8,47 @@
xopen
=====
-This small Python module provides a ``xopen`` function that works like the
+This small Python module provides an ``xopen`` function that works like the
built-in ``open`` function, but can also deal with compressed files.
Supported compression formats are gzip, bzip2 and xz. They are automatically
recognized by their file extensions `.gz`, `.bz2` or `.xz`.
The focus is on being as efficient as possible on all supported Python versions.
-For example, simply using ``gzip.open`` is slow in older Pythons, and it is
-a lot faster to use a ``gzip`` subprocess.
+For example, simply using ``gzip.open`` is very slow in older Pythons, and
+it is a lot faster to use a ``gzip`` subprocess. For writing to gzip files,
+``xopen`` uses ``pigz`` when available.
This module has originally been developed as part of the `cutadapt
tool <https://cutadapt.readthedocs.io/>`_ that is used in bioinformatics to
manipulate sequencing data. It has been in successful use within that software
for a few years.
+``xopen`` is compatible with Python 2.7, 3.3, 3.4, 3.5 and 3.6.
+
Usage
-----
Open a file for reading::
- with open('file.txt.xz') as f:
+ from xopen import xopen
+
+ with xopen('file.txt.xz') as f:
content = f.read()
Or without context manager::
- f = open('file.txt.xz')
+ from xopen import xopen
+
+ f = xopen('file.txt.xz')
content = f.read()
f.close()
Open a file for writing::
- with open('file.txt.gz', mode='w') as f:
+ from xopen import xopen
+
+ with xopen('file.txt.gz', mode='w') as f:
f.write('Hello')
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index 3c6e79c..0000000
--- a/setup.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-[bdist_wheel]
-universal=1
diff --git a/setup.py b/setup.py
index 13fccc8..ea3ddf1 100644
--- a/setup.py
+++ b/setup.py
@@ -1,31 +1,37 @@
import sys
from setuptools import setup
-if sys.version_info < (2, 6):
- sys.stdout.write("At least Python 2.6 is required.\n")
+if sys.version_info < (2, 7):
+ sys.stdout.write("At least Python 2.7 is required.\n")
sys.exit(1)
with open('README.rst') as f:
long_description = f.read()
+if sys.version_info < (3, ):
+ requires = ['bz2file']
+else:
+ requires = []
+
setup(
- name = 'xopen',
- version = '0.1.1',
- author = 'Marcel Martin',
- author_email = 'mail at marcelm.net',
- url = 'https://github.com/marcelm/xopen/',
- description = 'Open compressed files transparently',
- long_description = long_description,
- license = 'MIT',
- py_modules = ['xopen'],
- classifiers = [
+ name='xopen',
+ version='0.3.2',
+ author='Marcel Martin',
+ author_email='mail at marcelm.net',
+ url='https://github.com/marcelm/xopen/',
+ description='Open compressed files transparently',
+ long_description=long_description,
+ license='MIT',
+ py_modules=['xopen'],
+ install_requires=requires,
+ classifiers=[
"Development Status :: 4 - Beta",
"License :: OSI Approved :: MIT License",
- "Programming Language :: Python :: 2.6",
"Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.3",
"Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 3.5",
+ "Programming Language :: Python :: 3.6",
]
)
diff --git a/tests/file.txt.bz2 b/tests/file.txt.bz2
index 82a5dcc..defbf7d 100644
Binary files a/tests/file.txt.bz2 and b/tests/file.txt.bz2 differ
diff --git a/tests/hello.gz b/tests/hello.gz
new file mode 100644
index 0000000..73227c4
Binary files /dev/null and b/tests/hello.gz differ
diff --git a/tests/testxopen.py b/tests/test_xopen.py
similarity index 76%
rename from tests/testxopen.py
rename to tests/test_xopen.py
index c0ba78e..ba04eee 100644
--- a/tests/testxopen.py
+++ b/tests/test_xopen.py
@@ -7,7 +7,7 @@ import sys
import signal
from contextlib import contextmanager
from nose.tools import raises
-from xopen import xopen
+from xopen import xopen, PipedGzipReader
base = "tests/file.txt"
@@ -18,6 +18,10 @@ try:
except ImportError:
lzma = None
+try:
+ import bz2
+except ImportError:
+ bz2 = None
major, minor = sys.version_info[0:2]
@@ -119,19 +123,24 @@ if lzma:
def test_append():
- for ext in ["", ".gz"]: # BZ2 does NOT support append
- text = "AB"
- if ext != "":
- text = text.encode("utf-8") # On Py3, need to send BYTES, not unicode
+ cases = ["", ".gz"]
+ if bz2 and sys.version_info > (3,):
+ # BZ2 does NOT support append in Py 2.
+ cases.append(".bz2")
+ if lzma:
+ cases.append(".xz")
+ for ext in cases:
+ # On Py3, need to send BYTES, not unicode. Let's do it for all.
+ text = "AB".encode("utf-8")
reference = text + text
with temporary_path('truncated.fastq' + ext) as path:
try:
os.unlink(path)
except OSError:
pass
- with xopen(path, 'a') as f:
+ with xopen(path, 'ab') as f:
f.write(text)
- with xopen(path, 'a') as f:
+ with xopen(path, 'ab') as f:
f.write(text)
with xopen(path, 'r') as f:
for appended in f:
@@ -143,6 +152,31 @@ def test_append():
assert appended == reference
+def test_append_text():
+ cases = ["", ".gz"]
+ if bz2 and sys.version_info > (3,):
+ # BZ2 does NOT support append in Py 2.
+ cases.append(".bz2")
+ if lzma:
+ cases.append(".xz")
+ for ext in cases: # BZ2 does NOT support append
+ text = "AB"
+ reference = text + text
+ with temporary_path('truncated.fastq' + ext) as path:
+ try:
+ os.unlink(path)
+ except OSError:
+ pass
+ with xopen(path, 'at') as f:
+ f.write(text)
+ with xopen(path, 'at') as f:
+ f.write(text)
+ with xopen(path, 'rt') as f:
+ for appended in f:
+ pass
+ assert appended == reference
+
+
def create_truncated_file(path):
# Random text
random_text = ''.join(random.choice('ABCDEFGHIJKLMNOPQRSTUVWXYZ') for _ in range(1024))
@@ -195,3 +229,13 @@ if sys.version_info[:2] != (3, 3):
for line in f:
pass
f.close()
+
+
+def test_bare_read_from_gz():
+ with xopen('tests/hello.gz', 'rt') as f:
+ assert f.read() == 'hello'
+
+
+def test_read_piped_gzip():
+ with PipedGzipReader('tests/hello.gz', 'rt') as f:
+ assert f.read() == 'hello'
diff --git a/tox.ini b/tox.ini
index 43c4de1..d3f5008 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
[tox]
-envlist = py26,py27,py33,py34,py35
+envlist = py27,py33,py34,py35,py36
[testenv]
deps = nose
diff --git a/xopen.py b/xopen.py
index 114ff16..29cb0c3 100644
--- a/xopen.py
+++ b/xopen.py
@@ -10,13 +10,18 @@ import os
import time
from subprocess import Popen, PIPE
-_PY3 = sys.version > '3'
+__version__ = '0.3.2'
-try:
- import bz2
-except ImportError:
- bz2 = None
+_PY3 = sys.version > '3'
+
+if not _PY3:
+ import bz2file as bz2
+else:
+ try:
+ import bz2
+ except ImportError:
+ bz2 = None
try:
import lzma
@@ -26,29 +31,41 @@ except ImportError:
if _PY3:
basestring = str
-else:
- basestring = basestring
-if sys.version_info < (2, 7):
- buffered_reader = lambda x: x
- buffered_writer = lambda x: x
-else:
- buffered_reader = io.BufferedReader
- buffered_writer = io.BufferedWriter
+class Closing(object):
+ """
+ Inherit from this class and implement a close() method to offer context
+ manager functionality.
+ """
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *exc_info):
+ self.close()
+
+ def __del__(self):
+ try:
+ self.close()
+ except:
+ pass
-class PipedGzipWriter(object):
+class PipedGzipWriter(Closing):
"""
- Write gzip-compressed files by running an external gzip process and piping
- into it. On Python 2, this is faster than using gzip.open. If pigz is
- available, that is used instead of gzip.
+ Write gzip-compressed files by running an external gzip or pigz process and
+ piping into it. On Python 2, this is faster than using gzip.open(). On
+ Python 3, it allows to run the compression in a separate process and can
+ therefore also be faster.
"""
- def __init__(self, path, mode='w'):
+ def __init__(self, path, mode='wt'):
+ if mode not in ('w', 'wt', 'wb', 'a', 'at', 'ab'):
+ raise ValueError("Mode is '{0}', but it must be 'w', 'wt', 'wb', 'a', 'at' or 'ab'".format(mode))
self.outfile = open(path, mode)
- self.devnull = open(os.devnull, 'w')
+ self.devnull = open(os.devnull, mode)
self.closed = False
+ self.name = path
# Setting close_fds to True in the Popen arguments is necessary due to
# <http://bugs.python.org/issue12786>.
@@ -57,7 +74,7 @@ class PipedGzipWriter(object):
self.process = Popen(['pigz'], **kwargs)
self.program = 'pigz'
except OSError as e:
- # binary not found, try regular gzip
+ # pigz not found, try regular gzip
try:
self.process = Popen(['gzip'], **kwargs)
self.program = 'gzip'
@@ -69,29 +86,38 @@ class PipedGzipWriter(object):
self.outfile.close()
self.devnull.close()
raise
+ if _PY3 and 'b' not in mode:
+ self._file = io.TextIOWrapper(self.process.stdin)
+ else:
+ self._file = self.process.stdin
def write(self, arg):
- self.process.stdin.write(arg)
+ self._file.write(arg)
def close(self):
self.closed = True
- self.process.stdin.close()
+ self._file.close()
retcode = self.process.wait()
self.outfile.close()
self.devnull.close()
if retcode != 0:
raise IOError("Output {0} process terminated with exit code {1}".format(self.program, retcode))
- def __enter__(self):
- return self
- def __exit__(self, *exc_info):
- self.close()
-
-
-class PipedGzipReader(object):
- def __init__(self, path):
+class PipedGzipReader(Closing):
+ def __init__(self, path, mode='r'):
+ if mode not in ('r', 'rt', 'rb'):
+ raise ValueError("Mode is '{0}', but it must be 'r', 'rt' or 'rb'".format(mode))
self.process = Popen(['gzip', '-cd', path], stdout=PIPE, stderr=PIPE)
+ self.name = path
+ if _PY3 and not 'b' in mode:
+ self._file = io.TextIOWrapper(self.process.stdout)
+ else:
+ self._file = self.process.stdout
+ if _PY3:
+ self._stderr = io.TextIOWrapper(self.process.stderr)
+ else:
+ self._stderr = self.process.stderr
self.closed = False
# Give gzip a little bit of time to report any errors (such as
# a non-existing file)
@@ -107,7 +133,7 @@ class PipedGzipReader(object):
self._raise_if_error()
def __iter__(self):
- for line in self.process.stdout:
+ for line in self._file:
yield line
self.process.wait()
self._raise_if_error()
@@ -119,29 +145,16 @@ class PipedGzipReader(object):
"""
retcode = self.process.poll()
if retcode is not None and retcode != 0:
- message = self.process.stderr.read().strip()
+ message = self._stderr.read().strip()
raise IOError(message)
def read(self, *args):
- data = self.process.stdout.read(*args)
+ data = self._file.read(*args)
if len(args) == 0 or args[0] <= 0:
# wait for process to terminate until we check the exit code
self.process.wait()
self._raise_if_error()
-
- def __enter__(self):
- return self
-
- def __exit__(self, *exc_info):
- self.close()
-
-
-class Closing(object):
- def __enter__(self):
- return self
-
- def __exit__(self, *exc_info):
- self.close()
+ return data
if bz2 is not None:
@@ -152,7 +165,7 @@ if bz2 is not None:
"""
-def xopen(filename, mode='r'):
+def xopen(filename, mode='r', compresslevel=6):
"""
Replacement for the "open" function that can also open files that have
been compressed with gzip, bzip2 or xz. If the filename is '-', standard
@@ -162,18 +175,20 @@ def xopen(filename, mode='r'):
the pipe to the gzip program). If the filename ends with .bz2, it's
opened as a bz2.BZ2File. Otherwise, the regular open() is used.
- mode can be: 'rt', 'rb', 'a', 'wt', or 'wb'
- Instead of 'rt' and 'wt', 'r' and 'w' can be used as abbreviations.
+ mode can be: 'rt', 'rb', 'at', 'ab', 'wt', or 'wb'
+ Instead of 'rt', 'wt' and 'at', 'r', 'w' and 'a' can be used as
+ abbreviations.
In Python 2, the 't' and 'b' characters are ignored.
- Append mode ('a') is unavailable with BZ2 compression and will raise an error.
+ Append mode ('a', 'at', 'ab') is unavailable with BZ2 compression and
+ will raise an error.
+
+ compresslevel is the gzip compression level. It is not used for bz2 and xz.
"""
- if mode == 'r':
- mode = 'rt'
- elif mode == 'w':
- mode = 'wt'
- if mode not in ('rt', 'rb', 'wt', 'wb', 'a'):
+ if mode in ('r', 'w', 'a'):
+ mode += 't'
+ if mode not in ('rt', 'rb', 'wt', 'wb', 'at', 'ab'):
raise ValueError("mode '{0}' not supported".format(mode))
if not _PY3:
mode = mode[0]
@@ -182,52 +197,53 @@ def xopen(filename, mode='r'):
# standard input and standard output handling
if filename == '-':
- if not _PY3:
- return sys.stdin if 'r' in mode else sys.stdout
return dict(
+ r=sys.stdin,
rt=sys.stdin,
- wt=sys.stdout,
rb=sys.stdin.buffer,
+ w=sys.stdout,
+ wt=sys.stdout,
wb=sys.stdout.buffer)[mode]
if filename.endswith('.bz2'):
if bz2 is None:
raise ImportError("Cannot open bz2 files: The bz2 module is not available")
if _PY3:
- if 't' in mode:
- return io.TextIOWrapper(bz2.BZ2File(filename, mode[0]))
+ return bz2.open(filename, mode)
+ else:
+ if mode[0] == 'a':
+ raise ValueError("mode '{0}' not supported with BZ2 compression".format(mode))
+ if sys.version_info[:2] <= (2, 6):
+ return ClosingBZ2File(filename, mode)
else:
return bz2.BZ2File(filename, mode)
- elif sys.version_info[:2] <= (2, 6):
- return ClosingBZ2File(filename, mode)
- else:
- return bz2.BZ2File(filename, mode)
elif filename.endswith('.xz'):
if lzma is None:
raise ImportError("Cannot open xz files: The lzma module is not available (use Python 3.3 or newer)")
return lzma.open(filename, mode)
elif filename.endswith('.gz'):
- if _PY3:
- if 't' in mode:
- # gzip.open in Python 3.2 does not support modes 'rt' and 'wt''
- return io.TextIOWrapper(gzip.open(filename, mode[0]))
- else:
- if 'r' in mode:
- return io.BufferedReader(gzip.open(filename, mode))
- else:
- return io.BufferedWriter(gzip.open(filename, mode))
+ if _PY3 and 'r' in mode:
+ return gzip.open(filename, mode)
+ if sys.version_info[:2] == (2, 7):
+ buffered_reader = io.BufferedReader
+ buffered_writer = io.BufferedWriter
else:
- # rb/rt are equivalent in Py2
- if 'r' in mode:
- try:
- return PipedGzipReader(filename)
- except OSError:
- # gzip not installed
- return buffered_reader(gzip.open(filename, mode))
- else:
- try:
- return PipedGzipWriter(filename, mode)
- except OSError:
- return buffered_writer(gzip.open(filename, mode))
+ buffered_reader = lambda x: x
+ buffered_writer = lambda x: x
+ if 'r' in mode:
+ try:
+ return PipedGzipReader(filename, mode)
+ except OSError:
+ # gzip not installed
+ return buffered_reader(gzip.open(filename, mode))
+ else:
+ try:
+ return PipedGzipWriter(filename, mode)
+ except OSError:
+ return buffered_writer(gzip.open(filename, mode, compresslevel=compresslevel))
else:
+ # Python 2.6 and 2.7 have io.open, which we could use to make the returned
+ # object consistent with the one returned in Python 3, but reading a file
+ # with io.open() is 100 times slower (!) on Python 2.6, and still about
+ # three times slower on Python 2.7 (tested with "for _ in io.open(path): pass")
return open(filename, mode)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/python-xopen.git
More information about the debian-med-commit
mailing list