[med-svn] [python-xopen] 01/02: New upstream version 0.1.1
Andreas Tille
tille at debian.org
Fri Mar 17 21:10:57 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository python-xopen.
commit c2dd3cb016df4a1b64699528a8a2dbf31ba83f49
Author: Andreas Tille <tille at debian.org>
Date: Fri Mar 17 22:09:42 2017 +0100
New upstream version 0.1.1
---
.gitignore | 6 ++
.travis.yml | 18 +++++
LICENSE | 19 +++++
README.rst | 68 ++++++++++++++++
setup.cfg | 2 +
setup.py | 31 +++++++
tests/file.txt | 2 +
tests/file.txt.bz2 | Bin 0 -> 71 bytes
tests/file.txt.gz | Bin 0 -> 53 bytes
tests/file.txt.xz | Bin 0 -> 96 bytes
tests/testxopen.py | 197 ++++++++++++++++++++++++++++++++++++++++++++
tox.ini | 6 ++
xopen.py | 233 +++++++++++++++++++++++++++++++++++++++++++++++++++++
13 files changed, 582 insertions(+)
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..f10536c
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+__pycache__/
+*.pyc
+*.egg-info
+*~
+.tox
+venv/
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..15895bb
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,18 @@
+sudo: false
+language: python
+cache:
+ directories:
+ - $HOME/.cache/pip
+python:
+ - "2.6"
+ - "2.7"
+ - "3.3"
+ - "3.4"
+ - "3.5"
+
+install:
+ - pip install .
+
+script:
+ - nosetests -P tests
+
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..b78f4e8
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,19 @@
+Copyright (c) 2010-2016 Marcel Martin <mail at marcelm.net>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..248b9dd
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,68 @@
+.. image:: https://travis-ci.org/marcelm/xopen.svg?branch=master
+ :target: https://travis-ci.org/marcelm/xopen
+
+.. image:: https://img.shields.io/pypi/v/xopen.svg?branch=master
+ :target: https://pypi.python.org/pypi/xopen
+
+=====
+xopen
+=====
+
+This small Python module provides a ``xopen`` function that works like the
+built-in ``open`` function, but can also deal with compressed files.
+Supported compression formats are gzip, bzip2 and xz. They are automatically
+recognized by their file extensions `.gz`, `.bz2` or `.xz`.
+
+The focus is on being as efficient as possible on all supported Python versions.
+For example, simply using ``gzip.open`` is slow in older Pythons, and it is
+a lot faster to use a ``gzip`` subprocess.
+
+This module has originally been developed as part of the `cutadapt
+tool <https://cutadapt.readthedocs.io/>`_ that is used in bioinformatics to
+manipulate sequencing data. It has been in successful use within that software
+for a few years.
+
+
+Usage
+-----
+
+Open a file for reading::
+
+ with open('file.txt.xz') as f:
+ content = f.read()
+
+Or without context manager::
+
+ f = open('file.txt.xz')
+ content = f.read()
+ f.close()
+
+Open a file for writing::
+
+ with open('file.txt.gz', mode='w') as f:
+ f.write('Hello')
+
+
+Credits
+-------
+
+The name ``xopen`` was taken from the C function of the same name in the
+`utils.h file which is part of BWA <https://github.com/lh3/bwa/blob/83662032a2192d5712996f36069ab02db82acf67/utils.h>`_.
+
+Kyle Beauchamp <https://github.com/kyleabeauchamp/> has contributed support for appending to files.
+
+Some ideas were taken from the `canopener project <https://github.com/selassid/canopener>`_.
+If you also want to open S3 files, you may want to use that module instead.
+
+
+Author
+------
+
+Marcel Martin <mail at marcelm.net> (`@marcelm_ on Twitter <https://twitter.com/marcelm_>`_)
+
+Links
+-----
+
+* `Source code <https://github.com/marcelm/xopen/>`_
+* `Report an issue <https://github.com/marcelm/xopen/issues>`_
+* `Project page on PyPI (Python package index) <https://pypi.python.org/pypi/xopen/>`_
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..3c6e79c
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,2 @@
+[bdist_wheel]
+universal=1
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..13fccc8
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,31 @@
+import sys
+from setuptools import setup
+
+if sys.version_info < (2, 6):
+ sys.stdout.write("At least Python 2.6 is required.\n")
+ sys.exit(1)
+
+with open('README.rst') as f:
+ long_description = f.read()
+
+setup(
+ name = 'xopen',
+ version = '0.1.1',
+ author = 'Marcel Martin',
+ author_email = 'mail at marcelm.net',
+ url = 'https://github.com/marcelm/xopen/',
+ description = 'Open compressed files transparently',
+ long_description = long_description,
+ license = 'MIT',
+ py_modules = ['xopen'],
+ classifiers = [
+ "Development Status :: 4 - Beta",
+ "License :: OSI Approved :: MIT License",
+ "Programming Language :: Python :: 2.6",
+ "Programming Language :: Python :: 2.7",
+ "Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3.3",
+ "Programming Language :: Python :: 3.4",
+ "Programming Language :: Python :: 3.5",
+ ]
+)
diff --git a/tests/file.txt b/tests/file.txt
new file mode 100644
index 0000000..5338dc8
--- /dev/null
+++ b/tests/file.txt
@@ -0,0 +1,2 @@
+Testing, testing ...
+The second line.
diff --git a/tests/file.txt.bz2 b/tests/file.txt.bz2
new file mode 100644
index 0000000..82a5dcc
Binary files /dev/null and b/tests/file.txt.bz2 differ
diff --git a/tests/file.txt.gz b/tests/file.txt.gz
new file mode 100644
index 0000000..fa8da29
Binary files /dev/null and b/tests/file.txt.gz differ
diff --git a/tests/file.txt.xz b/tests/file.txt.xz
new file mode 100644
index 0000000..9c763e0
Binary files /dev/null and b/tests/file.txt.xz differ
diff --git a/tests/testxopen.py b/tests/testxopen.py
new file mode 100644
index 0000000..c0ba78e
--- /dev/null
+++ b/tests/testxopen.py
@@ -0,0 +1,197 @@
+# coding: utf-8
+from __future__ import print_function, division, absolute_import
+import gzip
+import os
+import random
+import sys
+import signal
+from contextlib import contextmanager
+from nose.tools import raises
+from xopen import xopen
+
+
+base = "tests/file.txt"
+files = [ base + ext for ext in ['', '.gz', '.bz2' ] ]
+try:
+ import lzma
+ files.append(base + '.xz')
+except ImportError:
+ lzma = None
+
+
+major, minor = sys.version_info[0:2]
+
+
+ at contextmanager
+def temporary_path(name):
+ directory = os.path.join(os.path.dirname(__file__), 'testtmp')
+ if not os.path.isdir(directory):
+ os.mkdir(directory)
+ path = os.path.join(directory, name)
+ yield path
+ os.remove(path)
+
+
+def test_xopen_text():
+ for name in files:
+ with xopen(name, 'rt') as f:
+ lines = list(f)
+ assert len(lines) == 2
+ assert lines[1] == 'The second line.\n', name
+
+
+def test_xopen_binary():
+ for name in files:
+ with xopen(name, 'rb') as f:
+ lines = list(f)
+ assert len(lines) == 2
+ assert lines[1] == b'The second line.\n', name
+
+
+def test_no_context_manager_text():
+ for name in files:
+ f = xopen(name, 'rt')
+ lines = list(f)
+ assert len(lines) == 2
+ assert lines[1] == 'The second line.\n', name
+ f.close()
+ assert f.closed
+
+
+def test_no_context_manager_binary():
+ for name in files:
+ f = xopen(name, 'rb')
+ lines = list(f)
+ assert len(lines) == 2
+ assert lines[1] == b'The second line.\n', name
+ f.close()
+ assert f.closed
+
+
+ at raises(IOError)
+def test_nonexisting_file():
+ with xopen('this-file-does-not-exist') as f:
+ pass
+
+
+ at raises(IOError)
+def test_nonexisting_file_gz():
+ with xopen('this-file-does-not-exist.gz') as f:
+ pass
+
+
+ at raises(IOError)
+def test_nonexisting_file_bz2():
+ with xopen('this-file-does-not-exist.bz2') as f:
+ pass
+
+
+if lzma:
+ @raises(IOError)
+ def test_nonexisting_file_xz():
+ with xopen('this-file-does-not-exist.xz') as f:
+ pass
+
+
+ at raises(IOError)
+def test_write_to_nonexisting_dir():
+ with xopen('this/path/does/not/exist/file.txt', 'w') as f:
+ pass
+
+
+ at raises(IOError)
+def test_write_to_nonexisting_dir_gz():
+ with xopen('this/path/does/not/exist/file.gz', 'w') as f:
+ pass
+
+
+ at raises(IOError)
+def test_write_to_nonexisting_dir_bz2():
+ with xopen('this/path/does/not/exist/file.bz2', 'w') as f:
+ pass
+
+
+if lzma:
+ @raises(IOError)
+ def test_write_to_nonexisting_dir():
+ with xopen('this/path/does/not/exist/file.xz', 'w') as f:
+ pass
+
+
+def test_append():
+ for ext in ["", ".gz"]: # BZ2 does NOT support append
+ text = "AB"
+ if ext != "":
+ text = text.encode("utf-8") # On Py3, need to send BYTES, not unicode
+ reference = text + text
+ with temporary_path('truncated.fastq' + ext) as path:
+ try:
+ os.unlink(path)
+ except OSError:
+ pass
+ with xopen(path, 'a') as f:
+ f.write(text)
+ with xopen(path, 'a') as f:
+ f.write(text)
+ with xopen(path, 'r') as f:
+ for appended in f:
+ pass
+ try:
+ reference = reference.decode("utf-8")
+ except AttributeError:
+ pass
+ assert appended == reference
+
+
+def create_truncated_file(path):
+ # Random text
+ random_text = ''.join(random.choice('ABCDEFGHIJKLMNOPQRSTUVWXYZ') for _ in range(1024))
+ # Make the text a lot bigger in order to ensure that it is larger than the
+ # pipe buffer size.
+ random_text *= 1024 # 1MB
+ with xopen(path, 'w') as f:
+ f.write(random_text)
+ with open(path, 'a') as f:
+ f.truncate(os.stat(path).st_size - 10)
+
+
+class TookTooLongError(Exception):
+ pass
+
+
+class timeout:
+ # copied from https://stackoverflow.com/a/22348885/715090
+ def __init__(self, seconds=1):
+ self.seconds = seconds
+
+ def handle_timeout(self, signum, frame):
+ raise TookTooLongError()
+
+ def __enter__(self):
+ signal.signal(signal.SIGALRM, self.handle_timeout)
+ signal.alarm(self.seconds)
+
+ def __exit__(self, type, value, traceback):
+ signal.alarm(0)
+
+
+if sys.version_info[:2] != (3, 3):
+ @raises(EOFError, IOError)
+ def test_truncated_gz():
+ with temporary_path('truncated.gz') as path:
+ create_truncated_file(path)
+ with timeout(seconds=2):
+ f = xopen(path, 'r')
+ f.read()
+ f.close()
+
+
+ @raises(EOFError, IOError)
+ def test_truncated_gz_iter():
+ with temporary_path('truncated.gz') as path:
+ create_truncated_file(path)
+ with timeout(seconds=2):
+ f = xopen(path, 'r')
+ for line in f:
+ pass
+ f.close()
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..43c4de1
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,6 @@
+[tox]
+envlist = py26,py27,py33,py34,py35
+
+[testenv]
+deps = nose
+commands = nosetests -P tests
diff --git a/xopen.py b/xopen.py
new file mode 100644
index 0000000..114ff16
--- /dev/null
+++ b/xopen.py
@@ -0,0 +1,233 @@
+"""
+Open compressed files transparently.
+"""
+from __future__ import print_function, division, absolute_import
+
+import gzip
+import sys
+import io
+import os
+import time
+from subprocess import Popen, PIPE
+
+_PY3 = sys.version > '3'
+
+
+try:
+ import bz2
+except ImportError:
+ bz2 = None
+
+try:
+ import lzma
+except ImportError:
+ lzma = None
+
+
+if _PY3:
+ basestring = str
+else:
+ basestring = basestring
+
+
+if sys.version_info < (2, 7):
+ buffered_reader = lambda x: x
+ buffered_writer = lambda x: x
+else:
+ buffered_reader = io.BufferedReader
+ buffered_writer = io.BufferedWriter
+
+
+class PipedGzipWriter(object):
+ """
+ Write gzip-compressed files by running an external gzip process and piping
+ into it. On Python 2, this is faster than using gzip.open. If pigz is
+ available, that is used instead of gzip.
+ """
+
+ def __init__(self, path, mode='w'):
+ self.outfile = open(path, mode)
+ self.devnull = open(os.devnull, 'w')
+ self.closed = False
+
+ # Setting close_fds to True in the Popen arguments is necessary due to
+ # <http://bugs.python.org/issue12786>.
+ kwargs = dict(stdin=PIPE, stdout=self.outfile, stderr=self.devnull, close_fds=True)
+ try:
+ self.process = Popen(['pigz'], **kwargs)
+ self.program = 'pigz'
+ except OSError as e:
+ # binary not found, try regular gzip
+ try:
+ self.process = Popen(['gzip'], **kwargs)
+ self.program = 'gzip'
+ except (IOError, OSError) as e:
+ self.outfile.close()
+ self.devnull.close()
+ raise
+ except IOError as e:
+ self.outfile.close()
+ self.devnull.close()
+ raise
+
+ def write(self, arg):
+ self.process.stdin.write(arg)
+
+ def close(self):
+ self.closed = True
+ self.process.stdin.close()
+ retcode = self.process.wait()
+ self.outfile.close()
+ self.devnull.close()
+ if retcode != 0:
+ raise IOError("Output {0} process terminated with exit code {1}".format(self.program, retcode))
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *exc_info):
+ self.close()
+
+
+class PipedGzipReader(object):
+ def __init__(self, path):
+ self.process = Popen(['gzip', '-cd', path], stdout=PIPE, stderr=PIPE)
+ self.closed = False
+ # Give gzip a little bit of time to report any errors (such as
+ # a non-existing file)
+ time.sleep(0.01)
+ self._raise_if_error()
+
+ def close(self):
+ self.closed = True
+ retcode = self.process.poll()
+ if retcode is None:
+ # still running
+ self.process.terminate()
+ self._raise_if_error()
+
+ def __iter__(self):
+ for line in self.process.stdout:
+ yield line
+ self.process.wait()
+ self._raise_if_error()
+
+ def _raise_if_error(self):
+ """
+ Raise IOError if process is not running anymore and the
+ exit code is nonzero.
+ """
+ retcode = self.process.poll()
+ if retcode is not None and retcode != 0:
+ message = self.process.stderr.read().strip()
+ raise IOError(message)
+
+ def read(self, *args):
+ data = self.process.stdout.read(*args)
+ if len(args) == 0 or args[0] <= 0:
+ # wait for process to terminate until we check the exit code
+ self.process.wait()
+ self._raise_if_error()
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *exc_info):
+ self.close()
+
+
+class Closing(object):
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *exc_info):
+ self.close()
+
+
+if bz2 is not None:
+ class ClosingBZ2File(bz2.BZ2File, Closing):
+ """
+ A better BZ2File that supports the context manager protocol.
+ This is relevant only for Python 2.6.
+ """
+
+
+def xopen(filename, mode='r'):
+ """
+ Replacement for the "open" function that can also open files that have
+ been compressed with gzip, bzip2 or xz. If the filename is '-', standard
+ output (mode 'w') or input (mode 'r') is returned. If the filename ends
+ with .gz, the file is opened with a pipe to the gzip program. If that
+ does not work, then gzip.open() is used (the gzip module is slower than
+ the pipe to the gzip program). If the filename ends with .bz2, it's
+ opened as a bz2.BZ2File. Otherwise, the regular open() is used.
+
+ mode can be: 'rt', 'rb', 'a', 'wt', or 'wb'
+ Instead of 'rt' and 'wt', 'r' and 'w' can be used as abbreviations.
+
+ In Python 2, the 't' and 'b' characters are ignored.
+
+ Append mode ('a') is unavailable with BZ2 compression and will raise an error.
+ """
+ if mode == 'r':
+ mode = 'rt'
+ elif mode == 'w':
+ mode = 'wt'
+ if mode not in ('rt', 'rb', 'wt', 'wb', 'a'):
+ raise ValueError("mode '{0}' not supported".format(mode))
+ if not _PY3:
+ mode = mode[0]
+ if not isinstance(filename, basestring):
+ raise ValueError("the filename must be a string")
+
+ # standard input and standard output handling
+ if filename == '-':
+ if not _PY3:
+ return sys.stdin if 'r' in mode else sys.stdout
+ return dict(
+ rt=sys.stdin,
+ wt=sys.stdout,
+ rb=sys.stdin.buffer,
+ wb=sys.stdout.buffer)[mode]
+
+ if filename.endswith('.bz2'):
+ if bz2 is None:
+ raise ImportError("Cannot open bz2 files: The bz2 module is not available")
+ if _PY3:
+ if 't' in mode:
+ return io.TextIOWrapper(bz2.BZ2File(filename, mode[0]))
+ else:
+ return bz2.BZ2File(filename, mode)
+ elif sys.version_info[:2] <= (2, 6):
+ return ClosingBZ2File(filename, mode)
+ else:
+ return bz2.BZ2File(filename, mode)
+ elif filename.endswith('.xz'):
+ if lzma is None:
+ raise ImportError("Cannot open xz files: The lzma module is not available (use Python 3.3 or newer)")
+ return lzma.open(filename, mode)
+ elif filename.endswith('.gz'):
+ if _PY3:
+ if 't' in mode:
+ # gzip.open in Python 3.2 does not support modes 'rt' and 'wt''
+ return io.TextIOWrapper(gzip.open(filename, mode[0]))
+ else:
+ if 'r' in mode:
+ return io.BufferedReader(gzip.open(filename, mode))
+ else:
+ return io.BufferedWriter(gzip.open(filename, mode))
+ else:
+ # rb/rt are equivalent in Py2
+ if 'r' in mode:
+ try:
+ return PipedGzipReader(filename)
+ except OSError:
+ # gzip not installed
+ return buffered_reader(gzip.open(filename, mode))
+ else:
+ try:
+ return PipedGzipWriter(filename, mode)
+ except OSError:
+ return buffered_writer(gzip.open(filename, mode))
+ else:
+ return open(filename, mode)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/python-xopen.git
More information about the debian-med-commit
mailing list