[med-svn] [python-xopen] 01/06: New upstream version 0.3.2

Andreas Tille tille at debian.org
Sat Feb 10 12:35:01 UTC 2018


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository python-xopen.

commit 1cbdbf4f69dfb0e144d6e8836044557758ded3e3
Author: Andreas Tille <tille at debian.org>
Date:   Sat Feb 10 13:27:16 2018 +0100

    New upstream version 0.3.2
---
 .travis.yml                           |   3 +-
 README.rst                            |  21 ++--
 setup.cfg                             |   2 -
 setup.py                              |  32 +++---
 tests/file.txt.bz2                    | Bin 71 -> 118 bytes
 tests/hello.gz                        | Bin 0 -> 25 bytes
 tests/{testxopen.py => test_xopen.py} |  58 +++++++++--
 tox.ini                               |   2 +-
 xopen.py                              | 188 ++++++++++++++++++----------------
 9 files changed, 189 insertions(+), 117 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 15895bb..311b5ae 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,15 +4,14 @@ cache:
   directories:
     - $HOME/.cache/pip
 python:
-  - "2.6"
   - "2.7"
   - "3.3"
   - "3.4"
   - "3.5"
+  - "3.6"
 
 install:
   - pip install .
 
 script:
   - nosetests -P tests
-
diff --git a/README.rst b/README.rst
index 248b9dd..96a4164 100644
--- a/README.rst
+++ b/README.rst
@@ -8,38 +8,47 @@
 xopen
 =====
 
-This small Python module provides a ``xopen`` function that works like the
+This small Python module provides an ``xopen`` function that works like the
 built-in ``open`` function, but can also deal with compressed files.
 Supported compression formats are gzip, bzip2 and xz. They are automatically
 recognized by their file extensions `.gz`, `.bz2` or `.xz`.
 
 The focus is on being as efficient as possible on all supported Python versions.
-For example, simply using ``gzip.open`` is slow in older Pythons, and it is
-a lot faster to use a ``gzip`` subprocess.
+For example, simply using ``gzip.open`` is very slow in older Pythons, and
+it is a lot faster to use a ``gzip`` subprocess. For writing to gzip files,
+``xopen`` uses ``pigz`` when available.
 
 This module has originally been developed as part of the `cutadapt
 tool <https://cutadapt.readthedocs.io/>`_ that is used in bioinformatics to
 manipulate sequencing data. It has been in successful use within that software
 for a few years.
 
+``xopen`` is compatible with Python 2.7, 3.3, 3.4, 3.5 and 3.6.
+
 
 Usage
 -----
 
 Open a file for reading::
 
-    with open('file.txt.xz') as f:
+    from xopen import xopen
+
+    with xopen('file.txt.xz') as f:
         content = f.read()
 
 Or without context manager::
 
-    f = open('file.txt.xz')
+    from xopen import xopen
+
+    f = xopen('file.txt.xz')
     content = f.read()
     f.close()
 
 Open a file for writing::
 
-    with open('file.txt.gz', mode='w') as f:
+    from xopen import xopen
+
+    with xopen('file.txt.gz', mode='w') as f:
         f.write('Hello')
 
 
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index 3c6e79c..0000000
--- a/setup.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-[bdist_wheel]
-universal=1
diff --git a/setup.py b/setup.py
index 13fccc8..ea3ddf1 100644
--- a/setup.py
+++ b/setup.py
@@ -1,31 +1,37 @@
 import sys
 from setuptools import setup
 
-if sys.version_info < (2, 6):
-	sys.stdout.write("At least Python 2.6 is required.\n")
+if sys.version_info < (2, 7):
+	sys.stdout.write("At least Python 2.7 is required.\n")
 	sys.exit(1)
 
 with open('README.rst') as f:
 	long_description = f.read()
 
+if sys.version_info < (3, ):
+	requires = ['bz2file']
+else:
+	requires = []
+
 setup(
-	name = 'xopen',
-	version = '0.1.1',
-	author = 'Marcel Martin',
-	author_email = 'mail at marcelm.net',
-	url = 'https://github.com/marcelm/xopen/',
-	description = 'Open compressed files transparently',
-	long_description = long_description,
-	license = 'MIT',
-	py_modules = ['xopen'],
-	classifiers = [
+	name='xopen',
+	version='0.3.2',
+	author='Marcel Martin',
+	author_email='mail at marcelm.net',
+	url='https://github.com/marcelm/xopen/',
+	description='Open compressed files transparently',
+	long_description=long_description,
+	license='MIT',
+	py_modules=['xopen'],
+	install_requires=requires,
+	classifiers=[
 		"Development Status :: 4 - Beta",
 		"License :: OSI Approved :: MIT License",
-		"Programming Language :: Python :: 2.6",
 		"Programming Language :: Python :: 2.7",
 		"Programming Language :: Python :: 3",
 		"Programming Language :: Python :: 3.3",
 		"Programming Language :: Python :: 3.4",
 		"Programming Language :: Python :: 3.5",
+		"Programming Language :: Python :: 3.6",
 	]
 )
diff --git a/tests/file.txt.bz2 b/tests/file.txt.bz2
index 82a5dcc..defbf7d 100644
Binary files a/tests/file.txt.bz2 and b/tests/file.txt.bz2 differ
diff --git a/tests/hello.gz b/tests/hello.gz
new file mode 100644
index 0000000..73227c4
Binary files /dev/null and b/tests/hello.gz differ
diff --git a/tests/testxopen.py b/tests/test_xopen.py
similarity index 76%
rename from tests/testxopen.py
rename to tests/test_xopen.py
index c0ba78e..ba04eee 100644
--- a/tests/testxopen.py
+++ b/tests/test_xopen.py
@@ -7,7 +7,7 @@ import sys
 import signal
 from contextlib import contextmanager
 from nose.tools import raises
-from xopen import xopen
+from xopen import xopen, PipedGzipReader
 
 
 base = "tests/file.txt"
@@ -18,6 +18,10 @@ try:
 except ImportError:
 	lzma = None
 
+try:
+	import bz2
+except ImportError:
+	bz2 = None
 
 major, minor = sys.version_info[0:2]
 
@@ -119,19 +123,24 @@ if lzma:
 
 
 def test_append():
-	for ext in ["", ".gz"]:  # BZ2 does NOT support append
-		text = "AB"
-		if ext != "":
-			text = text.encode("utf-8")  # On Py3, need to send BYTES, not unicode
+	cases = ["", ".gz"]
+	if bz2 and sys.version_info > (3,):
+		# BZ2 does NOT support append in Py 2.
+		cases.append(".bz2")
+	if lzma:
+		cases.append(".xz")
+	for ext in cases:
+		# On Py3, need to send BYTES, not unicode. Let's do it for all.
+		text = "AB".encode("utf-8")
 		reference = text + text
 		with temporary_path('truncated.fastq' + ext) as path:
 			try:
 				os.unlink(path)
 			except OSError:
 				pass
-			with xopen(path, 'a') as f:
+			with xopen(path, 'ab') as f:
 				f.write(text)
-			with xopen(path, 'a') as f:
+			with xopen(path, 'ab') as f:
 				f.write(text)
 			with xopen(path, 'r') as f:
 				for appended in f:
@@ -143,6 +152,31 @@ def test_append():
 				assert appended == reference
 
 
+def test_append_text():
+	cases = ["", ".gz"]
+	if bz2 and sys.version_info > (3,):
+		# BZ2 does NOT support append in Py 2.
+		cases.append(".bz2")
+	if lzma:
+		cases.append(".xz")
+	for ext in cases:  # BZ2 does NOT support append
+		text = "AB"
+		reference = text + text
+		with temporary_path('truncated.fastq' + ext) as path:
+			try:
+				os.unlink(path)
+			except OSError:
+				pass
+			with xopen(path, 'at') as f:
+				f.write(text)
+			with xopen(path, 'at') as f:
+				f.write(text)
+			with xopen(path, 'rt') as f:
+				for appended in f:
+					pass
+				assert appended == reference
+
+
 def create_truncated_file(path):
 	# Random text
 	random_text = ''.join(random.choice('ABCDEFGHIJKLMNOPQRSTUVWXYZ') for _ in range(1024))
@@ -195,3 +229,13 @@ if sys.version_info[:2] != (3, 3):
 				for line in f:
 					pass
 				f.close()
+
+
+def test_bare_read_from_gz():
+	with xopen('tests/hello.gz', 'rt') as f:
+		assert f.read() == 'hello'
+
+
+def test_read_piped_gzip():
+	with PipedGzipReader('tests/hello.gz', 'rt') as f:
+		assert f.read() == 'hello'
diff --git a/tox.ini b/tox.ini
index 43c4de1..d3f5008 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = py26,py27,py33,py34,py35
+envlist = py27,py33,py34,py35,py36
 
 [testenv]
 deps = nose
diff --git a/xopen.py b/xopen.py
index 114ff16..29cb0c3 100644
--- a/xopen.py
+++ b/xopen.py
@@ -10,13 +10,18 @@ import os
 import time
 from subprocess import Popen, PIPE
 
-_PY3 = sys.version > '3'
+__version__ = '0.3.2'
 
 
-try:
-	import bz2
-except ImportError:
-	bz2 = None
+_PY3 = sys.version > '3'
+
+if not _PY3:
+	import bz2file as bz2
+else:
+	try:
+		import bz2
+	except ImportError:
+		bz2 = None
 
 try:
 	import lzma
@@ -26,29 +31,41 @@ except ImportError:
 
 if _PY3:
 	basestring = str
-else:
-	basestring = basestring
 
 
-if sys.version_info < (2, 7):
-	buffered_reader = lambda x: x
-	buffered_writer = lambda x: x
-else:
-	buffered_reader = io.BufferedReader
-	buffered_writer = io.BufferedWriter
+class Closing(object):
+	"""
+	Inherit from this class and implement a close() method to offer context
+	manager functionality.
+	"""
+	def __enter__(self):
+		return self
+
+	def __exit__(self, *exc_info):
+		self.close()
+
+	def __del__(self):
+		try:
+			self.close()
+		except:
+			pass
 
 
-class PipedGzipWriter(object):
+class PipedGzipWriter(Closing):
 	"""
-	Write gzip-compressed files by running an external gzip process and piping
-	into it. On Python 2, this is faster than using gzip.open. If pigz is
-	available, that is used instead of gzip.
+	Write gzip-compressed files by running an external gzip or pigz process and
+	piping into it. On Python 2, this is faster than using gzip.open(). On
+	Python 3, it allows to run the compression in a separate process and can
+	therefore also be faster.
 	"""
 
-	def __init__(self, path, mode='w'):
+	def __init__(self, path, mode='wt'):
+		if mode not in ('w', 'wt', 'wb', 'a', 'at', 'ab'):
+			raise ValueError("Mode is '{0}', but it must be 'w', 'wt', 'wb', 'a', 'at' or 'ab'".format(mode))
 		self.outfile = open(path, mode)
-		self.devnull = open(os.devnull, 'w')
+		self.devnull = open(os.devnull, mode)
 		self.closed = False
+		self.name = path
 
 		# Setting close_fds to True in the Popen arguments is necessary due to
 		# <http://bugs.python.org/issue12786>.
@@ -57,7 +74,7 @@ class PipedGzipWriter(object):
 			self.process = Popen(['pigz'], **kwargs)
 			self.program = 'pigz'
 		except OSError as e:
-			# binary not found, try regular gzip
+			# pigz not found, try regular gzip
 			try:
 				self.process = Popen(['gzip'], **kwargs)
 				self.program = 'gzip'
@@ -69,29 +86,38 @@ class PipedGzipWriter(object):
 			self.outfile.close()
 			self.devnull.close()
 			raise
+		if _PY3 and 'b' not in mode:
+			self._file = io.TextIOWrapper(self.process.stdin)
+		else:
+			self._file = self.process.stdin
 
 	def write(self, arg):
-		self.process.stdin.write(arg)
+		self._file.write(arg)
 
 	def close(self):
 		self.closed = True
-		self.process.stdin.close()
+		self._file.close()
 		retcode = self.process.wait()
 		self.outfile.close()
 		self.devnull.close()
 		if retcode != 0:
 			raise IOError("Output {0} process terminated with exit code {1}".format(self.program, retcode))
 
-	def __enter__(self):
-		return self
 
-	def __exit__(self, *exc_info):
-		self.close()
-
-
-class PipedGzipReader(object):
-	def __init__(self, path):
+class PipedGzipReader(Closing):
+	def __init__(self, path, mode='r'):
+		if mode not in ('r', 'rt', 'rb'):
+			raise ValueError("Mode is '{0}', but it must be 'r', 'rt' or 'rb'".format(mode))
 		self.process = Popen(['gzip', '-cd', path], stdout=PIPE, stderr=PIPE)
+		self.name = path
+		if _PY3 and not 'b' in mode:
+			self._file = io.TextIOWrapper(self.process.stdout)
+		else:
+			self._file = self.process.stdout
+		if _PY3:
+			self._stderr = io.TextIOWrapper(self.process.stderr)
+		else:
+			self._stderr = self.process.stderr
 		self.closed = False
 		# Give gzip a little bit of time to report any errors (such as
 		# a non-existing file)
@@ -107,7 +133,7 @@ class PipedGzipReader(object):
 		self._raise_if_error()
 
 	def __iter__(self):
-		for line in self.process.stdout:
+		for line in self._file:
 			yield line
 		self.process.wait()
 		self._raise_if_error()
@@ -119,29 +145,16 @@ class PipedGzipReader(object):
 		"""
 		retcode = self.process.poll()
 		if retcode is not None and retcode != 0:
-			message = self.process.stderr.read().strip()
+			message = self._stderr.read().strip()
 			raise IOError(message)
 
 	def read(self, *args):
-		data = self.process.stdout.read(*args)
+		data = self._file.read(*args)
 		if len(args) == 0 or args[0] <= 0:
 			# wait for process to terminate until we check the exit code
 			self.process.wait()
 		self._raise_if_error()
-
-	def __enter__(self):
-		return self
-
-	def __exit__(self, *exc_info):
-		self.close()
-
-
-class Closing(object):
-	def __enter__(self):
-		return self
-
-	def __exit__(self, *exc_info):
-		self.close()
+		return data
 
 
 if bz2 is not None:
@@ -152,7 +165,7 @@ if bz2 is not None:
 		"""
 
 
-def xopen(filename, mode='r'):
+def xopen(filename, mode='r', compresslevel=6):
 	"""
 	Replacement for the "open" function that can also open files that have
 	been compressed with gzip, bzip2 or xz. If the filename is '-', standard
@@ -162,18 +175,20 @@ def xopen(filename, mode='r'):
 	the pipe to the gzip program). If the filename ends with .bz2, it's
 	opened as a bz2.BZ2File. Otherwise, the regular open() is used.
 
-	mode can be: 'rt', 'rb', 'a', 'wt', or 'wb'
-	Instead of 'rt' and 'wt', 'r' and 'w' can be used as abbreviations.
+	mode can be: 'rt', 'rb', 'at', 'ab', 'wt', or 'wb'
+	Instead of 'rt', 'wt' and 'at', 'r', 'w' and 'a' can be used as
+	abbreviations.
 
 	In Python 2, the 't' and 'b' characters are ignored.
 
-	Append mode ('a') is unavailable with BZ2 compression and will raise an error.
+	Append mode ('a', 'at', 'ab') is unavailable with BZ2 compression and
+	will raise an error.
+
+	compresslevel is the gzip compression level. It is not used for bz2 and xz.
 	"""
-	if mode == 'r':
-		mode = 'rt'
-	elif mode == 'w':
-		mode = 'wt'
-	if mode not in ('rt', 'rb', 'wt', 'wb', 'a'):
+	if mode in ('r', 'w', 'a'):
+		mode += 't'
+	if mode not in ('rt', 'rb', 'wt', 'wb', 'at', 'ab'):
 		raise ValueError("mode '{0}' not supported".format(mode))
 	if not _PY3:
 		mode = mode[0]
@@ -182,52 +197,53 @@ def xopen(filename, mode='r'):
 
 	# standard input and standard output handling
 	if filename == '-':
-		if not _PY3:
-			return sys.stdin if 'r' in mode else sys.stdout
 		return dict(
+			r=sys.stdin,
 			rt=sys.stdin,
-			wt=sys.stdout,
 			rb=sys.stdin.buffer,
+			w=sys.stdout,
+			wt=sys.stdout,
 			wb=sys.stdout.buffer)[mode]
 
 	if filename.endswith('.bz2'):
 		if bz2 is None:
 			raise ImportError("Cannot open bz2 files: The bz2 module is not available")
 		if _PY3:
-			if 't' in mode:
-				return io.TextIOWrapper(bz2.BZ2File(filename, mode[0]))
+			return bz2.open(filename, mode)
+		else:
+			if mode[0] == 'a':
+				raise ValueError("mode '{0}' not supported with BZ2 compression".format(mode))
+			if sys.version_info[:2] <= (2, 6):
+				return ClosingBZ2File(filename, mode)
 			else:
 				return bz2.BZ2File(filename, mode)
-		elif sys.version_info[:2] <= (2, 6):
-			return ClosingBZ2File(filename, mode)
-		else:
-			return bz2.BZ2File(filename, mode)
 	elif filename.endswith('.xz'):
 		if lzma is None:
 			raise ImportError("Cannot open xz files: The lzma module is not available (use Python 3.3 or newer)")
 		return lzma.open(filename, mode)
 	elif filename.endswith('.gz'):
-		if _PY3:
-			if 't' in mode:
-				# gzip.open in Python 3.2 does not support modes 'rt' and 'wt''
-				return io.TextIOWrapper(gzip.open(filename, mode[0]))
-			else:
-				if 'r' in mode:
-					return io.BufferedReader(gzip.open(filename, mode))
-				else:
-					return io.BufferedWriter(gzip.open(filename, mode))
+		if _PY3 and 'r' in mode:
+			return gzip.open(filename, mode)
+		if sys.version_info[:2] == (2, 7):
+			buffered_reader = io.BufferedReader
+			buffered_writer = io.BufferedWriter
 		else:
-			# rb/rt are equivalent in Py2
-			if 'r' in mode:
-				try:
-					return PipedGzipReader(filename)
-				except OSError:
-					# gzip not installed
-					return buffered_reader(gzip.open(filename, mode))
-			else:
-				try:
-					return PipedGzipWriter(filename, mode)
-				except OSError:
-					return buffered_writer(gzip.open(filename, mode))
+			buffered_reader = lambda x: x
+			buffered_writer = lambda x: x
+		if 'r' in mode:
+			try:
+				return PipedGzipReader(filename, mode)
+			except OSError:
+				# gzip not installed
+				return buffered_reader(gzip.open(filename, mode))
+		else:
+			try:
+				return PipedGzipWriter(filename, mode)
+			except OSError:
+				return buffered_writer(gzip.open(filename, mode, compresslevel=compresslevel))
 	else:
+		# Python 2.6 and 2.7 have io.open, which we could use to make the returned
+		# object consistent with the one returned in Python 3, but reading a file
+		# with io.open() is 100 times slower (!) on Python 2.6, and still about
+		# three times slower on Python 2.7 (tested with "for _ in io.open(path): pass")
 		return open(filename, mode)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/python-xopen.git



More information about the debian-med-commit mailing list