[Python-modules-commits] r19042 - in packages/python-docutils/trunk/debian (4 files)

jwilk at users.alioth.debian.org jwilk at users.alioth.debian.org
Sun Oct 23 20:05:51 UTC 2011


    Date: Sunday, October 23, 2011 @ 20:05:50
  Author: jwilk
Revision: 19042

Backport upstream patches to fix encoding issues with Python 3.

Added:
  packages/python-docutils/trunk/debian/patches/fix-error-reporting-encoding-issues.diff
  packages/python-docutils/trunk/debian/patches/fix-io-encoding-issues.diff
Modified:
  packages/python-docutils/trunk/debian/changelog
  packages/python-docutils/trunk/debian/patches/series

Modified: packages/python-docutils/trunk/debian/changelog
===================================================================
--- packages/python-docutils/trunk/debian/changelog	2011-10-23 20:01:26 UTC (rev 19041)
+++ packages/python-docutils/trunk/debian/changelog	2011-10-23 20:05:50 UTC (rev 19042)
@@ -11,8 +11,10 @@
     report.
   * Drop unused header file for rst2newlatex manual page.
   * Backport upstream patch to add missing import (add-missing-import.diff).
+  * Backport upstream patches to fix encoding issues with Python 3
+    (fix-*-encoding-issues.diff).
 
- -- Jakub Wilk <jwilk at debian.org>  Sun, 23 Oct 2011 22:00:49 +0200
+ -- Jakub Wilk <jwilk at debian.org>  Sun, 23 Oct 2011 22:05:17 +0200
 
 python-docutils (0.8.1-3) unstable; urgency=low
 

Added: packages/python-docutils/trunk/debian/patches/fix-error-reporting-encoding-issues.diff
===================================================================
--- packages/python-docutils/trunk/debian/patches/fix-error-reporting-encoding-issues.diff	                        (rev 0)
+++ packages/python-docutils/trunk/debian/patches/fix-error-reporting-encoding-issues.diff	2011-10-23 20:05:50 UTC (rev 19042)
@@ -0,0 +1,26 @@
+Description: Fix encoding issues in error reporting for Python 3.
+Origin: upstream, http://svn.berlios.de/viewvc/docutils/trunk/docutils/docutils/error_reporting.py?r1=7073&r2=7196
+Last-Update: 2011-10-23
+
+--- a/docutils/error_reporting.py
++++ b/docutils/error_reporting.py
+@@ -184,13 +184,17 @@
+         except UnicodeEncodeError:
+             self.stream.write(data.encode(self.encoding, self.encoding_errors))
+         except TypeError: # in Python 3, stderr expects unicode
+-            self.stream.write(unicode(data, self.encoding, self.decoding_errors))
++            if self.stream in (sys.stderr, sys.stdout):
++                self.stream.buffer.write(data) # write bytes to raw stream
++            else:
++                self.stream.write(unicode(data, self.encoding,
++                                          self.decoding_errors))
+ 
+     def close(self):
+         """
+         Close the error-output stream.
+ 
+-        Ignored if the stream is` sys.stderr` or `sys.stdout` or has no 
++        Ignored if the stream is` sys.stderr` or `sys.stdout` or has no
+         close() method.
+         """
+         if self.stream in (sys.stdout, sys.stderr):

Added: packages/python-docutils/trunk/debian/patches/fix-io-encoding-issues.diff
===================================================================
--- packages/python-docutils/trunk/debian/patches/fix-io-encoding-issues.diff	                        (rev 0)
+++ packages/python-docutils/trunk/debian/patches/fix-io-encoding-issues.diff	2011-10-23 20:05:50 UTC (rev 19042)
@@ -0,0 +1,139 @@
+Description: Fix encoding issues in file I/O for Python 3.
+Bug: http://sourceforge.net/tracker/?func=detail&aid=3395948&group_id=38414&atid=422030
+Origin: upstream, http://svn.berlios.de/viewvc/docutils/trunk/docutils/docutils/io.py?r1=7073&r2=7196
+Last-Update: 2011-10-23
+
+--- a/docutils/io.py
++++ b/docutils/io.py
+@@ -10,6 +10,7 @@
+ __docformat__ = 'reStructuredText'
+ 
+ import sys
++import os
+ import re
+ import codecs
+ from docutils import TransformSpec
+@@ -84,10 +85,9 @@
+                 # Apply heuristics only if no encoding is explicitly given and
+                 # no BOM found.  Start with UTF-8, because that only matches
+                 # data that *IS* UTF-8:
+-                encodings = [enc for enc in ('utf-8',
+-                                             locale_encoding, # can be None
+-                                             'latin-1') # fallback encoding
+-                             if enc]
++                encodings = ['utf-8', 'latin-1']
++                if locale_encoding:
++                    encodings.insert(1, locale_encoding)
+         for enc in encodings:
+             try:
+                 decoded = unicode(data, enc, self.error_handler)
+@@ -105,7 +105,7 @@
+     coding_slug = re.compile(b("coding[:=]\s*([-\w.]+)"))
+     """Encoding declaration pattern."""
+ 
+-    byte_order_marks = ((codecs.BOM_UTF8, 'utf-8'), # actually 'utf-8-sig'
++    byte_order_marks = ((codecs.BOM_UTF8, 'utf-8'), # 'utf-8-sig' new in v2.5
+                         (codecs.BOM_UTF16_BE, 'utf-16-be'),
+                         (codecs.BOM_UTF16_LE, 'utf-16-le'),)
+     """Sequence of (start_bytes, encoding) tuples for encoding detection.
+@@ -224,6 +224,15 @@
+                     sys.exit(1)
+             else:
+                 self.source = sys.stdin
++        elif (sys.version_info >= (3,0) and
++              self.encoding and hasattr(self.source, 'encoding') and
++              self.encoding != self.source.encoding and
++              codecs.lookup(self.encoding) !=
++              codecs.lookup(self.source.encoding)):
++            # TODO: re-open, warn or raise error?
++            raise UnicodeError('Encoding clash: encoding given is "%s" '
++                               'but source is opened with encoding "%s".' %
++                               (self.encoding, self.source.encoding))
+         if not source_path:
+             try:
+                 self.source_path = self.source.name
+@@ -234,8 +243,25 @@
+         """
+         Read and decode a single file and return the data (Unicode string).
+         """
+-        try:
+-            data = self.source.read()
++        try: # In Python < 2.5, try...except has to be nested in try...finally.
++            try:
++                if self.source is sys.stdin and sys.version_info >= (3,0):
++                    # read as binary data to circumvent auto-decoding
++                    data = self.source.buffer.read()
++                    # normalize newlines
++                    data = b('\n').join(data.splitlines()) + b('\n')
++                else:
++                    data = self.source.read()
++            except (UnicodeError, LookupError), err: # (in Py3k read() decodes)
++                if not self.encoding and self.source_path:
++                    # re-read in binary mode and decode with heuristics
++                    b_source = open(self.source_path, 'rb')
++                    data = b_source.read()
++                    b_source.close()
++                    # normalize newlines
++                    data = b('\n').join(data.splitlines()) + b('\n')
++                else:
++                    raise
+         finally:
+             if self.autoclose:
+                 self.close()
+@@ -245,12 +271,7 @@
+         """
+         Return lines of a single file as list of Unicode strings.
+         """
+-        try:
+-            lines = self.source.readlines()
+-        finally:
+-            if self.autoclose:
+-                self.close()
+-        return [self.decode(line) for line in lines]
++        return self.read().splitlines(True)
+ 
+     def close(self):
+         if self.source is not sys.stdin:
+@@ -317,20 +338,34 @@
+     def write(self, data):
+         """Encode `data`, write it to a single file, and return it.
+ 
+-        In Python 3, a (unicode) string is returned.
++        In Python 3, `data` is returned unchanged.
+         """
+-        if sys.version_info >= (3,0):
+-            output = data # in py3k, write expects a (Unicode) string
+-        else:
+-            output = self.encode(data)
++        if sys.version_info < (3,0):
++            data = self.encode(data)
+         if not self.opened:
+             self.open()
+-        try:
+-            self.destination.write(output)
++        try: # In Python < 2.5, try...except has to be nested in try...finally.
++            try:
++                if (sys.version_info >= (3,0) and self.encoding and
++                    hasattr(self.destination,'encoding') and
++                    self.encoding != self.destination.encoding and
++                    codecs.lookup(self.encoding) !=
++                    codecs.lookup(self.destination.encoding)):
++                    # encode self, write bytes
++                    bdata = self.encode(data)
++                    if os.linesep != '\n':
++                        bdata = bdata.replace('\n', os.linesep)
++                    sys.stdout.buffer.write(bdata)
++                else:
++                    self.destination.write(data)
++            except (UnicodeError, LookupError), err: # can only happen in py3k
++                raise UnicodeError(
++                    'Unable to encode output data. output-encoding is: '
++                    '%s.\n(%s)' % (self.encoding, ErrorString(err)))
+         finally:
+             if self.autoclose:
+                 self.close()
+-        return output
++        return data
+ 
+     def close(self):
+         if self.destination not in (sys.stdout, sys.stderr):

Modified: packages/python-docutils/trunk/debian/patches/series
===================================================================
--- packages/python-docutils/trunk/debian/patches/series	2011-10-23 20:01:26 UTC (rev 19041)
+++ packages/python-docutils/trunk/debian/patches/series	2011-10-23 20:05:50 UTC (rev 19042)
@@ -7,4 +7,6 @@
 languages-get_language-2nd-arg-optional.diff
 testall-no-stdout-stderr-redirect.diff
 add-missing-import.diff
+fix-error-reporting-encoding-issues.diff
+fix-io-encoding-issues.diff
 move-data-to-usr-share.diff




More information about the Python-modules-commits mailing list