[Python-modules-commits] [ldif3] 03/08: Import ldif3_3.2.0.orig.tar.gz

Mon Jun 6 17:08:16 UTC 2016

This is an automated email from the git hooks/post-receive script.

fladi pushed a commit to branch master
in repository ldif3.

commit e85306cf4ea77a967468e37599f3435b09befbfd
Author: Michael Fladischer <FladischerMichael at fladi.at>
Date:   Mon Jun 6 14:03:22 2016 +0200

    Import ldif3_3.2.0.orig.tar.gz
---
 CHANGES.rst | 19 +++++++++++++++++-
 README.rst  | 14 ++++++++++----
 ldif3.py    | 64 +++++++++++++++++++++++++++++++++++++++++++++----------------
 setup.cfg   |  2 +-
 setup.py    | 25 ++++++++++++------------
 tests.py    | 36 ++++++++++++++++++++++++++++++++++
 6 files changed, 125 insertions(+), 35 deletions(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index c93b63f..0dd97a1 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -1,7 +1,24 @@
+3.2.0 (2016-06-03)
+------------------
+
+-   Overhaule the unicode support to also support binary data (e.g. images)
+    encoded in LDIF.
+
+    You can now pass an encoding to the parser which will be used to decode
+    values. If decoding failes, a bytestring will be returned.  If you pass an
+    encoding of ``None``, the parser will not try to do any conversion and
+    return bytes directly.
+
+    This change should be completely backwards compatible, as the parser now
+    gracefully handles a case where it crashed previously.
+
+    (See `#4 <https://github.com/xi/ldif3/issues/4>`_)
+
+
 3.1.1 (2015-09-20)
 ------------------
 
-- Allow empty values for attributes.
+-   Allow empty values for attributes.
 
 
 3.1.0 (2015-07-09)
diff --git a/README.rst b/README.rst
index 6e53e85..834b24e 100644
--- a/README.rst
+++ b/README.rst
@@ -32,11 +32,17 @@ Write LDIF to a file (or ``BytesIO``)::
 Unicode support
 ---------------
 
-The stream object that is passed to parser or writer must be a byte
-stream. It must use UTF-8 encoding as described in the spec.
+The stream object that is passed to parser or writer must be an ascii byte
+stream.
 
-The parsed objects (``dn`` and the keys and values of ``record``) on the
-other hand are unicode strings.
+The spec allows to include arbitrary data in base64 encoding or via URL. There
+is no way of knowing the encoding of this data. To handle this, there are two
+modes:
+
+By default, the ``LDIFParser`` will try to interpret all values as UTF-8 and
+leave only the ones that fail to decode as bytes. But you can also pass an
+``encoding`` of ``None`` to the constructor, in which case the parser will not
+try to do any conversion and return bytes directly.
 
 
 .. _RFC 2849: https://tools.ietf.org/html/rfc2849
diff --git a/ldif3.py b/ldif3.py
index 1789896..a9e50eb 100644
--- a/ldif3.py
+++ b/ldif3.py
@@ -2,16 +2,6 @@
 
 from __future__ import unicode_literals
 
-__version__ = '3.1.1'
-
-__all__ = [
-    # constants
-    'LDIF_PATTERN',
-    # classes
-    'LDIFWriter',
-    'LDIFParser',
-]
-
 import base64
 import re
 import logging
@@ -24,6 +14,16 @@ except ImportError:  # pragma: nocover
     from urllib.parse import urlparse
     from urllib.request import urlopen
 
+__version__ = '3.2.0'
+
+__all__ = [
+    # constants
+    'LDIF_PATTERN',
+    # classes
+    'LDIFWriter',
+    'LDIFParser',
+]
+
 log = logging.getLogger('ldif3')
 
 ATTRTYPE_PATTERN = r'[\w;.-]+(;[\w_-]+)*'
@@ -73,14 +73,25 @@ class LDIFWriter(object):
 
     :type line_sep: bytearray
     :param line_sep: line separator
+
+    :type encoding: string
+    :param encoding: Encoding to use for converting values to bytes.  Note that
+        the spec requires the dn field to be UTF-8 encoded, so it does not
+        really make sense to use anything else.  Default: ``'utf8'``.
     """
 
     def __init__(
-            self, output_file, base64_attrs=[], cols=76, line_sep=b'\n'):
+            self,
+            output_file,
+            base64_attrs=[],
+            cols=76,
+            line_sep=b'\n',
+            encoding='utf8'):
         self._output_file = output_file
         self._base64_attrs = lower(base64_attrs)
         self._cols = cols
         self._line_sep = line_sep
+        self._encoding = encoding
 
         self.records_written = 0  #: number of records that have been written
 
@@ -107,18 +118,21 @@ class LDIFWriter(object):
         self._base64_attrs
         """
         return attr_type.lower() in self._base64_attrs or \
+            isinstance(attr_value, bytes) or \
             UNSAFE_STRING_RE.search(attr_value) is not None
 
     def _unparse_attr(self, attr_type, attr_value):
         """Write a single attribute type/value pair."""
         if self._needs_base64_encoding(attr_type, attr_value):
-            encoded = base64.encodestring(attr_value.encode('utf8'))\
+            if not isinstance(attr_value, bytes):
+                attr_value = attr_value.encode(self._encoding)
+            encoded = base64.encodestring(attr_value)\
                 .replace(b'\n', b'')\
-                .decode('utf8')
+                .decode('ascii')
             line = ':: '.join([attr_type, encoded])
         else:
             line = ': '.join([attr_type, attr_value])
-        self._fold_line(line.encode('utf8'))
+        self._fold_line(line.encode('ascii'))
 
     def _unparse_entry_record(self, entry):
         """
@@ -202,6 +216,13 @@ class LDIFParser(object):
     :type line_sep: bytearray
     :param line_sep: line separator
 
+    :type encoding: string
+    :param encoding: Encoding to use for converting values to unicode strings.
+        If decoding failes, the raw bytestring will be used instead. You can
+        also pass ``None`` which will skip decoding and always produce
+        bytestrings. Note that this only applies to entry values. ``dn`` and
+        entry keys will always be unicode strings.
+
     :type strict: boolean
     :param strict: If set to ``False``, recoverable parse errors will produce
         log warnings rather than exceptions.
@@ -222,11 +243,13 @@ class LDIFParser(object):
             ignored_attr_types=[],
             process_url_schemes=[],
             line_sep=b'\n',
+            encoding='utf8',
             strict=True):
         self._input_file = input_file
         self._process_url_schemes = lower(process_url_schemes)
         self._ignored_attr_types = lower(ignored_attr_types)
         self._line_sep = line_sep
+        self._encoding = encoding
         self._strict = strict
 
         self.line_counter = 0  #: number of lines that have been read
@@ -268,7 +291,8 @@ class LDIFParser(object):
     def _parse_attr(self, line):
         """Parse a single attribute type/value pair."""
         colon_pos = line.index(b':')
-        attr_type = line[0:colon_pos]
+        attr_type = line[0:colon_pos].decode('ascii')
+
         if line[colon_pos:].startswith(b'::'):
             attr_value = base64.decodestring(line[colon_pos + 2:])
         elif line[colon_pos:].startswith(b':<'):
@@ -280,7 +304,15 @@ class LDIFParser(object):
                     attr_value = urlopen(url.decode('ascii')).read()
         else:
             attr_value = line[colon_pos + 1:].strip()
-        return attr_type.decode('utf8'), attr_value.decode('utf8')
+
+        if attr_type == u'dn':
+            return attr_type, attr_value.decode('utf8')
+        elif self._encoding is not None:
+            try:
+                return attr_type, attr_value.decode(self._encoding)
+            except UnicodeError:
+                pass
+        return attr_type, attr_value
 
     def _error(self, msg):
         if self._strict:
diff --git a/setup.cfg b/setup.cfg
index 5d32289..9735485 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -8,5 +8,5 @@ cover-html=1
 cover-html-dir=.cover
 
 [flake8]
-exclude=.git,.tox,.env,build,dist
+exclude=.git,.tox,.env,build,dist,setup.py
 ignore=E127,E128
diff --git a/setup.py b/setup.py
index 31ce9c8..0e8ac85 100644
--- a/setup.py
+++ b/setup.py
@@ -1,25 +1,24 @@
 #!/usr/bin/env python
 
 import os
-
+import re
 from setuptools import setup
 
-curdir = os.path.dirname(os.path.abspath(__file__))
+DIRNAME = os.path.abspath(os.path.dirname(__file__))
+rel = lambda *parts: os.path.abspath(os.path.join(DIRNAME, *parts))
 
+README = open(rel('README.rst')).read()
+MAIN = open(rel('ldif3.py')).read()
+VERSION = re.search("__version__ = '([^']+)'", MAIN).group(1)
+NAME = re.search('^"""(.*) - (.*)"""', MAIN).group(1)
+DESCRIPTION = re.search('^"""(.*) - (.*)"""', MAIN).group(2)
 
-with open(os.path.join(curdir, 'ldif3.py')) as fh:
-    for line in fh:
-        if line.startswith('"""'):
-            name, description = line.rstrip().strip('"').split(' - ')
-        elif line.startswith('__version__'):
-            version = line.split('\'')[1]
-            break
 
 setup(
-    name=name,
-    version=version,
-    description=description,
-    long_description=open(os.path.join(curdir, 'README.rst')).read(),
+    name=NAME,
+    version=VERSION,
+    description=DESCRIPTION,
+    long_description=README,
     url='https://github.com/xi/ldif3',
     author='Tobias Bengfort',
     author_email='tobias.bengfort at posteo.de',
diff --git a/tests.py b/tests.py
index acd8198..f90153b 100644
--- a/tests.py
+++ b/tests.py
@@ -1,3 +1,5 @@
+# -*- encoding: utf8 -*-
+
 from __future__ import unicode_literals
 
 import unittest
@@ -242,6 +244,30 @@ class TestLDIFParser(unittest.TestCase):
             self.assertEqual(dn, DNS[i])
             self.assertEqual(record, RECORDS[i])
 
+    def test_parse_binary(self):
+        self.stream = BytesIO(b'dn: cn=Bjorn J Jensen\n'
+            b'jpegPhoto:: 8PLz\nfoo: bar')
+        self.p = ldif3.LDIFParser(self.stream)
+        items = list(self.p.parse())
+        self.assertEqual(items, [(
+            u'cn=Bjorn J Jensen', {
+                u'jpegPhoto': [b'\xf0\xf2\xf3'],
+                u'foo': [u'bar'],
+            }
+        )])
+
+    def test_parse_binary_raw(self):
+        self.stream = BytesIO(b'dn: cn=Bjorn J Jensen\n'
+            b'jpegPhoto:: 8PLz\nfoo: bar')
+        self.p = ldif3.LDIFParser(self.stream, encoding=None)
+        items = list(self.p.parse())
+        self.assertEqual(items, [(
+            'cn=Bjorn J Jensen', {
+                u'jpegPhoto': [b'\xf0\xf2\xf3'],
+                u'foo': [b'bar'],
+            }
+        )])
+
 
 class TestLDIFParserEmptyAttrValue(unittest.TestCase):
     def setUp(self):
@@ -337,3 +363,13 @@ class TestLDIFWriter(unittest.TestCase):
     def test_unparse_fail(self):
         with self.assertRaises(ValueError):
             self.w.unparse(DNS[0], 'foo')
+
+    def test_unparse_binary(self):
+        self.w.unparse(u'cn=Bjorn J Jensen', {u'jpegPhoto': [b'\xf0\xf2\xf3']})
+        value = self.stream.getvalue()
+        self.assertEqual(value, b'dn: cn=Bjorn J Jensen\njpegPhoto:: 8PLz\n\n')
+
+    def test_unparse_unicode_dn(self):
+        self.w.unparse(u'cn=Björn J Jensen', {u'foo': [u'bar']})
+        value = self.stream.getvalue()
+        self.assertEqual(value, b'dn:: Y249QmrDtnJuIEogSmVuc2Vu\nfoo: bar\n\n')

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/ldif3.git