[PATCH] Implement __unicode__ in Changelog and ChangeBlock
John Wright
jsw at debian.org
Fri Apr 15 10:32:28 UTC 2011
Closes: #561805
---
lib/debian/changelog.py | 64 ++++++++++++++++++++++++++++++------------
tests/test_changelog.py | 38 ++++++++++++++++++++++++-
tests/test_changelog_unicode | 11 +++++++
3 files changed, 94 insertions(+), 19 deletions(-)
create mode 100644 tests/test_changelog_unicode
diff --git a/lib/debian/changelog.py b/lib/debian/changelog.py
index 3786e7f..93c348f 100644
--- a/lib/debian/changelog.py
+++ b/lib/debian/changelog.py
@@ -67,7 +67,7 @@ class ChangeBlock(object):
def __init__(self, package=None, version=None, distributions=None,
urgency=None, urgency_comment=None, changes=None,
- author=None, date=None, other_pairs=None):
+ author=None, date=None, other_pairs=None, encoding='utf-8'):
self._raw_version = None
self._set_version(version)
self.package = package
@@ -79,6 +79,7 @@ class ChangeBlock(object):
self.date = date
self._trailing = []
self.other_pairs = other_pairs or {}
+ self._encoding = encoding
self._no_trailer = False
self._trailer_separator = " "
@@ -127,7 +128,8 @@ class ChangeBlock(object):
changes.append(change)
self._changes = changes
- def __str__(self):
+ def __unicode__(self):
+ # TODO(jsw): Switch to StringIO or a list to join at the end.
block = ""
if self.package is None:
raise ChangelogCreateError("Package not specified")
@@ -159,6 +161,9 @@ class ChangeBlock(object):
block += line + "\n"
return block
+ def __str__(self):
+ return unicode(self).encode(self._encoding)
+
topline = re.compile(r'^(\w%(name_chars)s*) \(([^\(\) \t]+)\)'
'((\s+%(name_chars)s+)+)\;'
% {'name_chars': '[-+0-9a-z.]'},
@@ -196,16 +201,26 @@ old_format_re8 = re.compile('^(?:\d+:)?\w[\w.+~-]*:?\s*$')
class Changelog(object):
- """Represents a debian/changelog file. You can ask it several things
- about the file.
- """
-
+ """Represents a debian/changelog file."""
+ # TODO(jsw): Avoid masking the 'file' built-in.
def __init__(self, file=None, max_blocks=None,
- allow_empty_author=False, strict=True):
- """Set up the Changelog for use. file is the contects of the
- changelog.
+ allow_empty_author=False, strict=True, encoding='utf-8'):
+ """Initializer.
+
+ Args:
+ file: The contents of the changelog, either as a str, unicode object,
+ or an iterator of lines (each of which is either a str or unicode)
+ max_blocks: The maximum number of blocks to parse from the input.
+ (Default: no limit)
+ allow_empty_author: Whether to allow an empty author in the trailer
+ line of a change block. (Default: False)
+ strict: Whether to raise an exception if there are errors. (Default:
+ use a warning)
+ encoding: If the input is a str or iterator of str, the encoding to
+ use when interpreting the input.
"""
+ self._encoding = encoding
self._blocks = []
self.initial_blank_lines = []
if file is not None:
@@ -223,29 +238,37 @@ class Changelog(object):
warnings.warn(message)
def parse_changelog(self, file, max_blocks=None,
- allow_empty_author=False, strict=True):
+ allow_empty_author=False, strict=True, encoding=None):
first_heading = "first heading"
next_heading_or_eof = "next heading of EOF"
start_of_change_data = "start of change data"
more_changes_or_trailer = "more change data or trailer"
slurp_to_end = "slurp to end"
+ encoding = encoding or self._encoding
+
+ if file is None:
+ self._parse_error('Empty changelog file.', strict)
+ return
+
self._blocks = []
self.initial_blank_lines = []
- current_block = ChangeBlock()
+ current_block = ChangeBlock(encoding=encoding)
changes = []
state = first_heading
old_state = None
if isinstance(file, basestring):
# Make sure the changelog file is not empty.
- if file is None or len(file.strip()) == 0:
+ if len(file.strip()) == 0:
self._parse_error('Empty changelog file.', strict)
return
file = file.splitlines()
for line in file:
+ if not isinstance(line, unicode):
+ line = line.decode(encoding)
# Support both lists of lines without the trailing newline and
# those with trailing newlines (e.g. when given a file object
# directly)
@@ -354,7 +377,7 @@ class Changelog(object):
current_block._changes = changes
self._blocks.append(current_block)
changes = []
- current_block = ChangeBlock()
+ current_block = ChangeBlock(encoding=encoding)
state = next_heading_or_eof
elif end_no_details_match is not None:
if not allow_empty_author:
@@ -364,7 +387,7 @@ class Changelog(object):
current_block._changes = changes
self._blocks.append(current_block)
changes = []
- current_block = ChangeBlock()
+ current_block = ChangeBlock(encoding=encoding)
state = next_heading_or_eof
elif blank_match is not None:
changes.append(line)
@@ -436,11 +459,15 @@ class Changelog(object):
def _raw_versions(self):
return [block._raw_version for block in self._blocks]
- def __str__(self):
- cl = "\n".join(self.initial_blank_lines)
+ def __unicode__(self):
+ pieces = []
+ pieces.append(u'\n'.join(self.initial_blank_lines))
for block in self._blocks:
- cl += str(block)
- return cl
+ pieces.append(unicode(block))
+ return u''.join(pieces)
+
+ def __str__(self):
+ return unicode(self).encode(self._encoding)
def __iter__(self):
return iter(self._blocks)
@@ -469,6 +496,7 @@ class Changelog(object):
date = property(lambda self: self._blocks[0].date, set_date)
def new_block(self, **kwargs):
+ kwargs.setdefault('encoding', self._encoding)
block = ChangeBlock(**kwargs)
block.add_trailing_line('')
self._blocks.insert(0, block)
diff --git a/tests/test_changelog.py b/tests/test_changelog.py
index 92376f2..84f7a02 100755
--- a/tests/test_changelog.py
+++ b/tests/test_changelog.py
@@ -1,5 +1,6 @@
#!/usr/bin/python
-
+# vim: fileencoding=utf-8
+#
# changelog.py -- Python module for Debian changelogs
# Copyright (C) 2006-7 James Westby <jw+debian at jameswestby.net>
# Copyright (C) 2008 Canonical Ltd.
@@ -159,6 +160,41 @@ class ChangelogTests(unittest.TestCase):
for c in (c1, c2, c3):
self.assertEqual(str(c), cl_data)
+ def test_utf8_encoded_file_input(self):
+ c = changelog.Changelog(open('test_changelog_unicode'))
+ u = unicode(c)
+ expected_u = u"""haskell-src-exts (1.8.2-3) unstable; urgency=low
+
+ * control: Use versioned Replaces: and Conflicts:
+
+ -- Marco Túlio Gontijo e Silva <marcot at debian.org> Wed, 05 May 2010 18:01:53 -0300
+
+haskell-src-exts (1.8.2-2) unstable; urgency=low
+
+ * debian/control: Rename -doc package.
+
+ -- Marco Túlio Gontijo e Silva <marcot at debian.org> Tue, 16 Mar 2010 10:59:48 -0300
+"""
+ self.assertEqual(u, expected_u)
+ self.assertEquals(str(c), u.encode('utf-8'))
+
+ def test_unicode_object_input(self):
+ c_str = open('test_changelog_unicode').read()
+ c_unicode = c_str.decode('utf-8')
+ c = changelog.Changelog(c_unicode)
+ self.assertEqual(unicode(c), c_unicode)
+ self.assertEqual(str(c), c_str)
+
+ def test_non_utf8_encoding(self):
+ c_str = open('test_changelog_unicode').read()
+ c_unicode = c_str.decode('utf-8')
+ c_latin1_str = c_unicode.encode('latin1')
+ c = changelog.Changelog(c_latin1_str, encoding='latin1')
+ self.assertEqual(unicode(c), c_unicode)
+ self.assertEqual(str(c), c_latin1_str)
+ for block in c:
+ self.assertEqual(str(block), unicode(block).encode('latin1'))
+
def test_block_iterator(self):
c = changelog.Changelog(open('test_changelog'))
self.assertEqual(map(str, c._blocks), map(str, c))
diff --git a/tests/test_changelog_unicode b/tests/test_changelog_unicode
new file mode 100644
index 0000000..8b52ed4
--- /dev/null
+++ b/tests/test_changelog_unicode
@@ -0,0 +1,11 @@
+haskell-src-exts (1.8.2-3) unstable; urgency=low
+
+ * control: Use versioned Replaces: and Conflicts:
+
+ -- Marco Túlio Gontijo e Silva <marcot at debian.org> Wed, 05 May 2010 18:01:53 -0300
+
+haskell-src-exts (1.8.2-2) unstable; urgency=low
+
+ * debian/control: Rename -doc package.
+
+ -- Marco Túlio Gontijo e Silva <marcot at debian.org> Tue, 16 Mar 2010 10:59:48 -0300
--
1.7.4.1
More information about the pkg-python-debian-maint
mailing list