[med-svn] [python-xopen] 01/02: New upstream version 0.1.1

Andreas Tille tille at debian.org
Fri Mar 17 21:10:57 UTC 2017


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository python-xopen.

commit c2dd3cb016df4a1b64699528a8a2dbf31ba83f49
Author: Andreas Tille <tille at debian.org>
Date:   Fri Mar 17 22:09:42 2017 +0100

    New upstream version 0.1.1
---
 .gitignore         |   6 ++
 .travis.yml        |  18 +++++
 LICENSE            |  19 +++++
 README.rst         |  68 ++++++++++++++++
 setup.cfg          |   2 +
 setup.py           |  31 +++++++
 tests/file.txt     |   2 +
 tests/file.txt.bz2 | Bin 0 -> 71 bytes
 tests/file.txt.gz  | Bin 0 -> 53 bytes
 tests/file.txt.xz  | Bin 0 -> 96 bytes
 tests/testxopen.py | 197 ++++++++++++++++++++++++++++++++++++++++++++
 tox.ini            |   6 ++
 xopen.py           | 233 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 13 files changed, 582 insertions(+)

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..f10536c
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+__pycache__/
+*.pyc
+*.egg-info
+*~
+.tox
+venv/
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..15895bb
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,18 @@
+sudo: false
+language: python
+cache:
+  directories:
+    - $HOME/.cache/pip
+python:
+  - "2.6"
+  - "2.7"
+  - "3.3"
+  - "3.4"
+  - "3.5"
+
+install:
+  - pip install .
+
+script:
+  - nosetests -P tests
+
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..b78f4e8
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,19 @@
+Copyright (c) 2010-2016 Marcel Martin <mail at marcelm.net>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..248b9dd
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,68 @@
+.. image:: https://travis-ci.org/marcelm/xopen.svg?branch=master
+    :target: https://travis-ci.org/marcelm/xopen
+
+.. image:: https://img.shields.io/pypi/v/xopen.svg?branch=master
+    :target: https://pypi.python.org/pypi/xopen
+
+=====
+xopen
+=====
+
+This small Python module provides a ``xopen`` function that works like the
+built-in ``open`` function, but can also deal with compressed files.
+Supported compression formats are gzip, bzip2 and xz. They are automatically
+recognized by their file extensions `.gz`, `.bz2` or `.xz`.
+
+The focus is on being as efficient as possible on all supported Python versions.
+For example, simply using ``gzip.open`` is slow in older Pythons, and it is
+a lot faster to use a ``gzip`` subprocess.
+
+This module has originally been developed as part of the `cutadapt
+tool <https://cutadapt.readthedocs.io/>`_ that is used in bioinformatics to
+manipulate sequencing data. It has been in successful use within that software
+for a few years.
+
+
+Usage
+-----
+
+Open a file for reading::
+
+    with open('file.txt.xz') as f:
+        content = f.read()
+
+Or without context manager::
+
+    f = open('file.txt.xz')
+    content = f.read()
+    f.close()
+
+Open a file for writing::
+
+    with open('file.txt.gz', mode='w') as f:
+        f.write('Hello')
+
+
+Credits
+-------
+
+The name ``xopen`` was taken from the C function of the same name in the
+`utils.h file which is part of BWA <https://github.com/lh3/bwa/blob/83662032a2192d5712996f36069ab02db82acf67/utils.h>`_.
+
+Kyle Beauchamp <https://github.com/kyleabeauchamp/> has contributed support for appending to files.
+
+Some ideas were taken from the `canopener project <https://github.com/selassid/canopener>`_.
+If you also want to open S3 files, you may want to use that module instead.
+
+
+Author
+------
+
+Marcel Martin <mail at marcelm.net> (`@marcelm_ on Twitter <https://twitter.com/marcelm_>`_)
+
+Links
+-----
+
+* `Source code <https://github.com/marcelm/xopen/>`_
+* `Report an issue <https://github.com/marcelm/xopen/issues>`_
+* `Project page on PyPI (Python package index) <https://pypi.python.org/pypi/xopen/>`_
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..3c6e79c
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,2 @@
+[bdist_wheel]
+universal=1
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..13fccc8
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,31 @@
+import sys
+from setuptools import setup
+
+if sys.version_info < (2, 6):
+	sys.stdout.write("At least Python 2.6 is required.\n")
+	sys.exit(1)
+
+with open('README.rst') as f:
+	long_description = f.read()
+
+setup(
+	name = 'xopen',
+	version = '0.1.1',
+	author = 'Marcel Martin',
+	author_email = 'mail at marcelm.net',
+	url = 'https://github.com/marcelm/xopen/',
+	description = 'Open compressed files transparently',
+	long_description = long_description,
+	license = 'MIT',
+	py_modules = ['xopen'],
+	classifiers = [
+		"Development Status :: 4 - Beta",
+		"License :: OSI Approved :: MIT License",
+		"Programming Language :: Python :: 2.6",
+		"Programming Language :: Python :: 2.7",
+		"Programming Language :: Python :: 3",
+		"Programming Language :: Python :: 3.3",
+		"Programming Language :: Python :: 3.4",
+		"Programming Language :: Python :: 3.5",
+	]
+)
diff --git a/tests/file.txt b/tests/file.txt
new file mode 100644
index 0000000..5338dc8
--- /dev/null
+++ b/tests/file.txt
@@ -0,0 +1,2 @@
+Testing, testing ...
+The second line.
diff --git a/tests/file.txt.bz2 b/tests/file.txt.bz2
new file mode 100644
index 0000000..82a5dcc
Binary files /dev/null and b/tests/file.txt.bz2 differ
diff --git a/tests/file.txt.gz b/tests/file.txt.gz
new file mode 100644
index 0000000..fa8da29
Binary files /dev/null and b/tests/file.txt.gz differ
diff --git a/tests/file.txt.xz b/tests/file.txt.xz
new file mode 100644
index 0000000..9c763e0
Binary files /dev/null and b/tests/file.txt.xz differ
diff --git a/tests/testxopen.py b/tests/testxopen.py
new file mode 100644
index 0000000..c0ba78e
--- /dev/null
+++ b/tests/testxopen.py
@@ -0,0 +1,197 @@
+# coding: utf-8
+from __future__ import print_function, division, absolute_import
+import gzip
+import os
+import random
+import sys
+import signal
+from contextlib import contextmanager
+from nose.tools import raises
+from xopen import xopen
+
+
+base = "tests/file.txt"
+files = [ base + ext for ext in ['', '.gz', '.bz2' ] ]
+try:
+	import lzma
+	files.append(base + '.xz')
+except ImportError:
+	lzma = None
+
+
+major, minor = sys.version_info[0:2]
+
+
+ at contextmanager
+def temporary_path(name):
+	directory = os.path.join(os.path.dirname(__file__), 'testtmp')
+	if not os.path.isdir(directory):
+		os.mkdir(directory)
+	path = os.path.join(directory, name)
+	yield path
+	os.remove(path)
+
+
+def test_xopen_text():
+	for name in files:
+		with xopen(name, 'rt') as f:
+			lines = list(f)
+			assert len(lines) == 2
+			assert lines[1] == 'The second line.\n', name
+
+
+def test_xopen_binary():
+	for name in files:
+		with xopen(name, 'rb') as f:
+			lines = list(f)
+			assert len(lines) == 2
+			assert lines[1] == b'The second line.\n', name
+
+
+def test_no_context_manager_text():
+	for name in files:
+		f = xopen(name, 'rt')
+		lines = list(f)
+		assert len(lines) == 2
+		assert lines[1] == 'The second line.\n', name
+		f.close()
+		assert f.closed
+
+
+def test_no_context_manager_binary():
+	for name in files:
+		f = xopen(name, 'rb')
+		lines = list(f)
+		assert len(lines) == 2
+		assert lines[1] == b'The second line.\n', name
+		f.close()
+		assert f.closed
+
+
+ at raises(IOError)
+def test_nonexisting_file():
+	with xopen('this-file-does-not-exist') as f:
+		pass
+
+
+ at raises(IOError)
+def test_nonexisting_file_gz():
+	with xopen('this-file-does-not-exist.gz') as f:
+		pass
+
+
+ at raises(IOError)
+def test_nonexisting_file_bz2():
+	with xopen('this-file-does-not-exist.bz2') as f:
+		pass
+
+
+if lzma:
+	@raises(IOError)
+	def test_nonexisting_file_xz():
+		with xopen('this-file-does-not-exist.xz') as f:
+			pass
+
+
+ at raises(IOError)
+def test_write_to_nonexisting_dir():
+	with xopen('this/path/does/not/exist/file.txt', 'w') as f:
+		pass
+
+
+ at raises(IOError)
+def test_write_to_nonexisting_dir_gz():
+	with xopen('this/path/does/not/exist/file.gz', 'w') as f:
+		pass
+
+
+ at raises(IOError)
+def test_write_to_nonexisting_dir_bz2():
+	with xopen('this/path/does/not/exist/file.bz2', 'w') as f:
+		pass
+
+
+if lzma:
+	@raises(IOError)
+	def test_write_to_nonexisting_dir():
+		with xopen('this/path/does/not/exist/file.xz', 'w') as f:
+			pass
+
+
+def test_append():
+	for ext in ["", ".gz"]:  # BZ2 does NOT support append
+		text = "AB"
+		if ext != "":
+			text = text.encode("utf-8")  # On Py3, need to send BYTES, not unicode
+		reference = text + text
+		with temporary_path('truncated.fastq' + ext) as path:
+			try:
+				os.unlink(path)
+			except OSError:
+				pass
+			with xopen(path, 'a') as f:
+				f.write(text)
+			with xopen(path, 'a') as f:
+				f.write(text)
+			with xopen(path, 'r') as f:
+				for appended in f:
+					pass
+				try:
+					reference = reference.decode("utf-8")
+				except AttributeError:
+					pass
+				assert appended == reference
+
+
+def create_truncated_file(path):
+	# Random text
+	random_text = ''.join(random.choice('ABCDEFGHIJKLMNOPQRSTUVWXYZ') for _ in range(1024))
+	# Make the text a lot bigger in order to ensure that it is larger than the
+	# pipe buffer size.
+	random_text *= 1024  # 1MB
+	with xopen(path, 'w') as f:
+		f.write(random_text)
+	with open(path, 'a') as f:
+		f.truncate(os.stat(path).st_size - 10)
+
+
+class TookTooLongError(Exception):
+	pass
+
+
+class timeout:
+	# copied from https://stackoverflow.com/a/22348885/715090
+	def __init__(self, seconds=1):
+		self.seconds = seconds
+
+	def handle_timeout(self, signum, frame):
+		raise TookTooLongError()
+
+	def __enter__(self):
+		signal.signal(signal.SIGALRM, self.handle_timeout)
+		signal.alarm(self.seconds)
+
+	def __exit__(self, type, value, traceback):
+		signal.alarm(0)
+
+
+if sys.version_info[:2] != (3, 3):
+	@raises(EOFError, IOError)
+	def test_truncated_gz():
+		with temporary_path('truncated.gz') as path:
+			create_truncated_file(path)
+			with timeout(seconds=2):
+				f = xopen(path, 'r')
+				f.read()
+				f.close()
+
+
+	@raises(EOFError, IOError)
+	def test_truncated_gz_iter():
+		with temporary_path('truncated.gz') as path:
+			create_truncated_file(path)
+			with timeout(seconds=2):
+				f = xopen(path, 'r')
+				for line in f:
+					pass
+				f.close()
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..43c4de1
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,6 @@
+[tox]
+envlist = py26,py27,py33,py34,py35
+
+[testenv]
+deps = nose
+commands = nosetests -P tests
diff --git a/xopen.py b/xopen.py
new file mode 100644
index 0000000..114ff16
--- /dev/null
+++ b/xopen.py
@@ -0,0 +1,233 @@
+"""
+Open compressed files transparently.
+"""
+from __future__ import print_function, division, absolute_import
+
+import gzip
+import sys
+import io
+import os
+import time
+from subprocess import Popen, PIPE
+
+_PY3 = sys.version > '3'
+
+
+try:
+	import bz2
+except ImportError:
+	bz2 = None
+
+try:
+	import lzma
+except ImportError:
+	lzma = None
+
+
+if _PY3:
+	basestring = str
+else:
+	basestring = basestring
+
+
+if sys.version_info < (2, 7):
+	buffered_reader = lambda x: x
+	buffered_writer = lambda x: x
+else:
+	buffered_reader = io.BufferedReader
+	buffered_writer = io.BufferedWriter
+
+
+class PipedGzipWriter(object):
+	"""
+	Write gzip-compressed files by running an external gzip process and piping
+	into it. On Python 2, this is faster than using gzip.open. If pigz is
+	available, that is used instead of gzip.
+	"""
+
+	def __init__(self, path, mode='w'):
+		self.outfile = open(path, mode)
+		self.devnull = open(os.devnull, 'w')
+		self.closed = False
+
+		# Setting close_fds to True in the Popen arguments is necessary due to
+		# <http://bugs.python.org/issue12786>.
+		kwargs = dict(stdin=PIPE, stdout=self.outfile, stderr=self.devnull, close_fds=True)
+		try:
+			self.process = Popen(['pigz'], **kwargs)
+			self.program = 'pigz'
+		except OSError as e:
+			# binary not found, try regular gzip
+			try:
+				self.process = Popen(['gzip'], **kwargs)
+				self.program = 'gzip'
+			except (IOError, OSError) as e:
+				self.outfile.close()
+				self.devnull.close()
+				raise
+		except IOError as e:
+			self.outfile.close()
+			self.devnull.close()
+			raise
+
+	def write(self, arg):
+		self.process.stdin.write(arg)
+
+	def close(self):
+		self.closed = True
+		self.process.stdin.close()
+		retcode = self.process.wait()
+		self.outfile.close()
+		self.devnull.close()
+		if retcode != 0:
+			raise IOError("Output {0} process terminated with exit code {1}".format(self.program, retcode))
+
+	def __enter__(self):
+		return self
+
+	def __exit__(self, *exc_info):
+		self.close()
+
+
+class PipedGzipReader(object):
+	def __init__(self, path):
+		self.process = Popen(['gzip', '-cd', path], stdout=PIPE, stderr=PIPE)
+		self.closed = False
+		# Give gzip a little bit of time to report any errors (such as
+		# a non-existing file)
+		time.sleep(0.01)
+		self._raise_if_error()
+
+	def close(self):
+		self.closed = True
+		retcode = self.process.poll()
+		if retcode is None:
+			# still running
+			self.process.terminate()
+		self._raise_if_error()
+
+	def __iter__(self):
+		for line in self.process.stdout:
+			yield line
+		self.process.wait()
+		self._raise_if_error()
+
+	def _raise_if_error(self):
+		"""
+		Raise IOError if process is not running anymore and the
+		exit code is nonzero.
+		"""
+		retcode = self.process.poll()
+		if retcode is not None and retcode != 0:
+			message = self.process.stderr.read().strip()
+			raise IOError(message)
+
+	def read(self, *args):
+		data = self.process.stdout.read(*args)
+		if len(args) == 0 or args[0] <= 0:
+			# wait for process to terminate until we check the exit code
+			self.process.wait()
+		self._raise_if_error()
+
+	def __enter__(self):
+		return self
+
+	def __exit__(self, *exc_info):
+		self.close()
+
+
+class Closing(object):
+	def __enter__(self):
+		return self
+
+	def __exit__(self, *exc_info):
+		self.close()
+
+
+if bz2 is not None:
+	class ClosingBZ2File(bz2.BZ2File, Closing):
+		"""
+		A better BZ2File that supports the context manager protocol.
+		This is relevant only for Python 2.6.
+		"""
+
+
+def xopen(filename, mode='r'):
+	"""
+	Replacement for the "open" function that can also open files that have
+	been compressed with gzip, bzip2 or xz. If the filename is '-', standard
+	output (mode 'w') or input (mode 'r') is returned. If the filename ends
+	with .gz, the file is opened with a pipe to the gzip program. If that
+	does not work, then gzip.open() is used (the gzip module is slower than
+	the pipe to the gzip program). If the filename ends with .bz2, it's
+	opened as a bz2.BZ2File. Otherwise, the regular open() is used.
+
+	mode can be: 'rt', 'rb', 'a', 'wt', or 'wb'
+	Instead of 'rt' and 'wt', 'r' and 'w' can be used as abbreviations.
+
+	In Python 2, the 't' and 'b' characters are ignored.
+
+	Append mode ('a') is unavailable with BZ2 compression and will raise an error.
+	"""
+	if mode == 'r':
+		mode = 'rt'
+	elif mode == 'w':
+		mode = 'wt'
+	if mode not in ('rt', 'rb', 'wt', 'wb', 'a'):
+		raise ValueError("mode '{0}' not supported".format(mode))
+	if not _PY3:
+		mode = mode[0]
+	if not isinstance(filename, basestring):
+		raise ValueError("the filename must be a string")
+
+	# standard input and standard output handling
+	if filename == '-':
+		if not _PY3:
+			return sys.stdin if 'r' in mode else sys.stdout
+		return dict(
+			rt=sys.stdin,
+			wt=sys.stdout,
+			rb=sys.stdin.buffer,
+			wb=sys.stdout.buffer)[mode]
+
+	if filename.endswith('.bz2'):
+		if bz2 is None:
+			raise ImportError("Cannot open bz2 files: The bz2 module is not available")
+		if _PY3:
+			if 't' in mode:
+				return io.TextIOWrapper(bz2.BZ2File(filename, mode[0]))
+			else:
+				return bz2.BZ2File(filename, mode)
+		elif sys.version_info[:2] <= (2, 6):
+			return ClosingBZ2File(filename, mode)
+		else:
+			return bz2.BZ2File(filename, mode)
+	elif filename.endswith('.xz'):
+		if lzma is None:
+			raise ImportError("Cannot open xz files: The lzma module is not available (use Python 3.3 or newer)")
+		return lzma.open(filename, mode)
+	elif filename.endswith('.gz'):
+		if _PY3:
+			if 't' in mode:
+				# gzip.open in Python 3.2 does not support modes 'rt' and 'wt''
+				return io.TextIOWrapper(gzip.open(filename, mode[0]))
+			else:
+				if 'r' in mode:
+					return io.BufferedReader(gzip.open(filename, mode))
+				else:
+					return io.BufferedWriter(gzip.open(filename, mode))
+		else:
+			# rb/rt are equivalent in Py2
+			if 'r' in mode:
+				try:
+					return PipedGzipReader(filename)
+				except OSError:
+					# gzip not installed
+					return buffered_reader(gzip.open(filename, mode))
+			else:
+				try:
+					return PipedGzipWriter(filename, mode)
+				except OSError:
+					return buffered_writer(gzip.open(filename, mode))
+	else:
+		return open(filename, mode)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/python-xopen.git



More information about the debian-med-commit mailing list