[Python-modules-commits] [ldif3] 03/08: Import ldif3_3.2.0.orig.tar.gz
Michael Fladischer
fladi at moszumanska.debian.org
Mon Jun 6 17:08:16 UTC 2016
This is an automated email from the git hooks/post-receive script.
fladi pushed a commit to branch master
in repository ldif3.
commit e85306cf4ea77a967468e37599f3435b09befbfd
Author: Michael Fladischer <FladischerMichael at fladi.at>
Date: Mon Jun 6 14:03:22 2016 +0200
Import ldif3_3.2.0.orig.tar.gz
---
CHANGES.rst | 19 +++++++++++++++++-
README.rst | 14 ++++++++++----
ldif3.py | 64 +++++++++++++++++++++++++++++++++++++++++++++----------------
setup.cfg | 2 +-
setup.py | 25 ++++++++++++------------
tests.py | 36 ++++++++++++++++++++++++++++++++++
6 files changed, 125 insertions(+), 35 deletions(-)
diff --git a/CHANGES.rst b/CHANGES.rst
index c93b63f..0dd97a1 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -1,7 +1,24 @@
+3.2.0 (2016-06-03)
+------------------
+
+- Overhaule the unicode support to also support binary data (e.g. images)
+ encoded in LDIF.
+
+ You can now pass an encoding to the parser which will be used to decode
+ values. If decoding failes, a bytestring will be returned. If you pass an
+ encoding of ``None``, the parser will not try to do any conversion and
+ return bytes directly.
+
+ This change should be completely backwards compatible, as the parser now
+ gracefully handles a case where it crashed previously.
+
+ (See `#4 <https://github.com/xi/ldif3/issues/4>`_)
+
+
3.1.1 (2015-09-20)
------------------
-- Allow empty values for attributes.
+- Allow empty values for attributes.
3.1.0 (2015-07-09)
diff --git a/README.rst b/README.rst
index 6e53e85..834b24e 100644
--- a/README.rst
+++ b/README.rst
@@ -32,11 +32,17 @@ Write LDIF to a file (or ``BytesIO``)::
Unicode support
---------------
-The stream object that is passed to parser or writer must be a byte
-stream. It must use UTF-8 encoding as described in the spec.
+The stream object that is passed to parser or writer must be an ascii byte
+stream.
-The parsed objects (``dn`` and the keys and values of ``record``) on the
-other hand are unicode strings.
+The spec allows to include arbitrary data in base64 encoding or via URL. There
+is no way of knowing the encoding of this data. To handle this, there are two
+modes:
+
+By default, the ``LDIFParser`` will try to interpret all values as UTF-8 and
+leave only the ones that fail to decode as bytes. But you can also pass an
+``encoding`` of ``None`` to the constructor, in which case the parser will not
+try to do any conversion and return bytes directly.
.. _RFC 2849: https://tools.ietf.org/html/rfc2849
diff --git a/ldif3.py b/ldif3.py
index 1789896..a9e50eb 100644
--- a/ldif3.py
+++ b/ldif3.py
@@ -2,16 +2,6 @@
from __future__ import unicode_literals
-__version__ = '3.1.1'
-
-__all__ = [
- # constants
- 'LDIF_PATTERN',
- # classes
- 'LDIFWriter',
- 'LDIFParser',
-]
-
import base64
import re
import logging
@@ -24,6 +14,16 @@ except ImportError: # pragma: nocover
from urllib.parse import urlparse
from urllib.request import urlopen
+__version__ = '3.2.0'
+
+__all__ = [
+ # constants
+ 'LDIF_PATTERN',
+ # classes
+ 'LDIFWriter',
+ 'LDIFParser',
+]
+
log = logging.getLogger('ldif3')
ATTRTYPE_PATTERN = r'[\w;.-]+(;[\w_-]+)*'
@@ -73,14 +73,25 @@ class LDIFWriter(object):
:type line_sep: bytearray
:param line_sep: line separator
+
+ :type encoding: string
+ :param encoding: Encoding to use for converting values to bytes. Note that
+ the spec requires the dn field to be UTF-8 encoded, so it does not
+ really make sense to use anything else. Default: ``'utf8'``.
"""
def __init__(
- self, output_file, base64_attrs=[], cols=76, line_sep=b'\n'):
+ self,
+ output_file,
+ base64_attrs=[],
+ cols=76,
+ line_sep=b'\n',
+ encoding='utf8'):
self._output_file = output_file
self._base64_attrs = lower(base64_attrs)
self._cols = cols
self._line_sep = line_sep
+ self._encoding = encoding
self.records_written = 0 #: number of records that have been written
@@ -107,18 +118,21 @@ class LDIFWriter(object):
self._base64_attrs
"""
return attr_type.lower() in self._base64_attrs or \
+ isinstance(attr_value, bytes) or \
UNSAFE_STRING_RE.search(attr_value) is not None
def _unparse_attr(self, attr_type, attr_value):
"""Write a single attribute type/value pair."""
if self._needs_base64_encoding(attr_type, attr_value):
- encoded = base64.encodestring(attr_value.encode('utf8'))\
+ if not isinstance(attr_value, bytes):
+ attr_value = attr_value.encode(self._encoding)
+ encoded = base64.encodestring(attr_value)\
.replace(b'\n', b'')\
- .decode('utf8')
+ .decode('ascii')
line = ':: '.join([attr_type, encoded])
else:
line = ': '.join([attr_type, attr_value])
- self._fold_line(line.encode('utf8'))
+ self._fold_line(line.encode('ascii'))
def _unparse_entry_record(self, entry):
"""
@@ -202,6 +216,13 @@ class LDIFParser(object):
:type line_sep: bytearray
:param line_sep: line separator
+ :type encoding: string
+ :param encoding: Encoding to use for converting values to unicode strings.
+ If decoding failes, the raw bytestring will be used instead. You can
+ also pass ``None`` which will skip decoding and always produce
+ bytestrings. Note that this only applies to entry values. ``dn`` and
+ entry keys will always be unicode strings.
+
:type strict: boolean
:param strict: If set to ``False``, recoverable parse errors will produce
log warnings rather than exceptions.
@@ -222,11 +243,13 @@ class LDIFParser(object):
ignored_attr_types=[],
process_url_schemes=[],
line_sep=b'\n',
+ encoding='utf8',
strict=True):
self._input_file = input_file
self._process_url_schemes = lower(process_url_schemes)
self._ignored_attr_types = lower(ignored_attr_types)
self._line_sep = line_sep
+ self._encoding = encoding
self._strict = strict
self.line_counter = 0 #: number of lines that have been read
@@ -268,7 +291,8 @@ class LDIFParser(object):
def _parse_attr(self, line):
"""Parse a single attribute type/value pair."""
colon_pos = line.index(b':')
- attr_type = line[0:colon_pos]
+ attr_type = line[0:colon_pos].decode('ascii')
+
if line[colon_pos:].startswith(b'::'):
attr_value = base64.decodestring(line[colon_pos + 2:])
elif line[colon_pos:].startswith(b':<'):
@@ -280,7 +304,15 @@ class LDIFParser(object):
attr_value = urlopen(url.decode('ascii')).read()
else:
attr_value = line[colon_pos + 1:].strip()
- return attr_type.decode('utf8'), attr_value.decode('utf8')
+
+ if attr_type == u'dn':
+ return attr_type, attr_value.decode('utf8')
+ elif self._encoding is not None:
+ try:
+ return attr_type, attr_value.decode(self._encoding)
+ except UnicodeError:
+ pass
+ return attr_type, attr_value
def _error(self, msg):
if self._strict:
diff --git a/setup.cfg b/setup.cfg
index 5d32289..9735485 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -8,5 +8,5 @@ cover-html=1
cover-html-dir=.cover
[flake8]
-exclude=.git,.tox,.env,build,dist
+exclude=.git,.tox,.env,build,dist,setup.py
ignore=E127,E128
diff --git a/setup.py b/setup.py
index 31ce9c8..0e8ac85 100644
--- a/setup.py
+++ b/setup.py
@@ -1,25 +1,24 @@
#!/usr/bin/env python
import os
-
+import re
from setuptools import setup
-curdir = os.path.dirname(os.path.abspath(__file__))
+DIRNAME = os.path.abspath(os.path.dirname(__file__))
+rel = lambda *parts: os.path.abspath(os.path.join(DIRNAME, *parts))
+README = open(rel('README.rst')).read()
+MAIN = open(rel('ldif3.py')).read()
+VERSION = re.search("__version__ = '([^']+)'", MAIN).group(1)
+NAME = re.search('^"""(.*) - (.*)"""', MAIN).group(1)
+DESCRIPTION = re.search('^"""(.*) - (.*)"""', MAIN).group(2)
-with open(os.path.join(curdir, 'ldif3.py')) as fh:
- for line in fh:
- if line.startswith('"""'):
- name, description = line.rstrip().strip('"').split(' - ')
- elif line.startswith('__version__'):
- version = line.split('\'')[1]
- break
setup(
- name=name,
- version=version,
- description=description,
- long_description=open(os.path.join(curdir, 'README.rst')).read(),
+ name=NAME,
+ version=VERSION,
+ description=DESCRIPTION,
+ long_description=README,
url='https://github.com/xi/ldif3',
author='Tobias Bengfort',
author_email='tobias.bengfort at posteo.de',
diff --git a/tests.py b/tests.py
index acd8198..f90153b 100644
--- a/tests.py
+++ b/tests.py
@@ -1,3 +1,5 @@
+# -*- encoding: utf8 -*-
+
from __future__ import unicode_literals
import unittest
@@ -242,6 +244,30 @@ class TestLDIFParser(unittest.TestCase):
self.assertEqual(dn, DNS[i])
self.assertEqual(record, RECORDS[i])
+ def test_parse_binary(self):
+ self.stream = BytesIO(b'dn: cn=Bjorn J Jensen\n'
+ b'jpegPhoto:: 8PLz\nfoo: bar')
+ self.p = ldif3.LDIFParser(self.stream)
+ items = list(self.p.parse())
+ self.assertEqual(items, [(
+ u'cn=Bjorn J Jensen', {
+ u'jpegPhoto': [b'\xf0\xf2\xf3'],
+ u'foo': [u'bar'],
+ }
+ )])
+
+ def test_parse_binary_raw(self):
+ self.stream = BytesIO(b'dn: cn=Bjorn J Jensen\n'
+ b'jpegPhoto:: 8PLz\nfoo: bar')
+ self.p = ldif3.LDIFParser(self.stream, encoding=None)
+ items = list(self.p.parse())
+ self.assertEqual(items, [(
+ 'cn=Bjorn J Jensen', {
+ u'jpegPhoto': [b'\xf0\xf2\xf3'],
+ u'foo': [b'bar'],
+ }
+ )])
+
class TestLDIFParserEmptyAttrValue(unittest.TestCase):
def setUp(self):
@@ -337,3 +363,13 @@ class TestLDIFWriter(unittest.TestCase):
def test_unparse_fail(self):
with self.assertRaises(ValueError):
self.w.unparse(DNS[0], 'foo')
+
+ def test_unparse_binary(self):
+ self.w.unparse(u'cn=Bjorn J Jensen', {u'jpegPhoto': [b'\xf0\xf2\xf3']})
+ value = self.stream.getvalue()
+ self.assertEqual(value, b'dn: cn=Bjorn J Jensen\njpegPhoto:: 8PLz\n\n')
+
+ def test_unparse_unicode_dn(self):
+ self.w.unparse(u'cn=Björn J Jensen', {u'foo': [u'bar']})
+ value = self.stream.getvalue()
+ self.assertEqual(value, b'dn:: Y249QmrDtnJuIEogSmVuc2Vu\nfoo: bar\n\n')
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/ldif3.git
More information about the Python-modules-commits
mailing list