Bug#597120: [PATCH] Avoid dumping unparseable data
John Wright
jsw at debian.org
Sat Sep 18 08:15:42 UTC 2010
Add an input validation method that is called by the default __setitem__,
and add some validation at output time for multivalued fields (since
their input is a mutable list which makes it unsuitable for validation
at intput time).
Closes: #597120
---
debian/changelog | 6 ++++++
lib/debian/deb822.py | 45 ++++++++++++++++++++++++++++++++++++++++++---
tests/test_deb822.py | 28 ++++++++++++++++++++++++++++
3 files changed, 76 insertions(+), 3 deletions(-)
diff --git a/debian/changelog b/debian/changelog
index 0292524..492b5df 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+python-debian (0.1.19) UNRELEASED; urgency=low
+
+ * Avoid dumping unparseable data. (Closes: #597120)
+
+ -- John Wright <jsw at debian.org> Sat, 18 Sep 2010 00:47:04 -0600
+
python-debian (0.1.18) unstable; urgency=low
* Support installation together with older versions of python-apt.
diff --git a/lib/debian/deb822.py b/lib/debian/deb822.py
index a0cad69..fc87124 100644
--- a/lib/debian/deb822.py
+++ b/lib/debian/deb822.py
@@ -406,8 +406,9 @@ class Deb822(Deb822Dict):
value = self.get_as_string(key)
if not value or value[0] == '\n':
# Avoid trailing whitespace after "Field:" if it's on its own
- # line or the value is empty
- # XXX Uh, really print value if value == '\n'?
+ # line or the value is empty. We don't have to worry about the
+ # case where value == '\n', since we ensure that is not the
+ # case in __setitem__.
entry = '%s:%s\n' % (key, value)
else:
entry = '%s: %s\n' % (key, value)
@@ -588,6 +589,31 @@ class Deb822(Deb822Dict):
return self.gpg_info
+ def validate_input(self, key, value):
+ """Raise ValueError if value is not a valid value for key
+
+ Subclasses that do interesting things for different keys may wish to
+ override this method.
+ """
+
+ # The value cannot end in a newline (if it did, dumping the object
+ # would result in multiple stanzas)
+ if value.endswith('\n'):
+ raise ValueError("value must not end in '\\n'")
+
+ # Make sure there are no blank lines (actually, the first one is
+ # allowed to be blank, but no others), and each subsequent line starts
+ # with whitespace
+ for line in value.splitlines()[1:]:
+ if not line:
+ raise ValueError("value must not have blank lines")
+ if not line[0].isspace():
+ raise ValueError("each line must start with whitespace")
+
+ def __setitem__(self, key, value):
+ self.validate_input(key, value)
+ Deb822Dict.__setitem__(self, key, value)
+
###
# XXX check what happens if input contains more that one signature
@@ -892,6 +918,16 @@ class _multivalued(Deb822):
for line in filter(None, contents.splitlines()):
updater_method(Deb822Dict(zip(fields, line.split())))
+ def validate_input(self, key, value):
+ if key.lower() in self._multivalued_fields:
+ # It's difficult to write a validator for multivalued fields, and
+ # basically futile, since we allow mutable lists. In any case,
+ # with sanity checking in get_as_string, we shouldn't ever output
+ # unparseable data.
+ pass
+ else:
+ Deb822.validate_input(self, key, value)
+
def get_as_string(self, key):
keyl = key.lower()
if keyl in self._multivalued_fields:
@@ -909,13 +945,16 @@ class _multivalued(Deb822):
field_lengths = {}
for item in array:
for x in order:
- raw_value = str(item[x])
+ raw_value = unicode(item[x])
try:
length = field_lengths[keyl][x]
except KeyError:
value = raw_value
else:
value = (length - len(raw_value)) * " " + raw_value
+ if "\n" in value:
+ raise ValueError("'\\n' not allowed in component of "
+ "multivalued field %s" % key)
fd.write(" %s" % value)
fd.write("\n")
return fd.getvalue().rstrip("\n")
diff --git a/tests/test_deb822.py b/tests/test_deb822.py
index 891f4cd..64fd77a 100755
--- a/tests/test_deb822.py
+++ b/tests/test_deb822.py
@@ -726,6 +726,34 @@ Description: python modules to work with Debian-related data formats
self.assertEqual(p2['uploaders'],
u'Frank Küster <frank at debian.org>')
+ @staticmethod
+ def _dictset(d, key, value):
+ d[key] = value
+
+ def test_field_value_ends_in_newline(self):
+ """Field values are not allowed to end with newlines"""
+
+ d = deb822.Deb822()
+ self.assertRaises(ValueError, self._dictset, d, 'foo', 'bar\n')
+ self.assertRaises(ValueError, self._dictset, d, 'foo', 'bar\nbaz\n')
+
+ def test_field_value_contains_blank_line(self):
+ """Field values are not allowed to contain blank lines"""
+
+ d = deb822.Deb822()
+ self.assertRaises(ValueError, self._dictset, d, 'foo', 'bar\n\nbaz')
+ self.assertRaises(ValueError, self._dictset, d, 'foo', '\n\nbaz')
+
+ def test_multivalued_field_contains_newline(self):
+ """Multivalued field components are not allowed to contain newlines"""
+
+ d = deb822.Dsc()
+ # We don't check at set time, since one could easily modify the list
+ # without deb822 knowing. We instead check at get time.
+ d['Files'] = [{'md5sum': 'deadbeef', 'size': '9605', 'name': 'bad\n'}]
+ self.assertRaises(ValueError, d.get_as_string, 'files')
+
+
class TestPkgRelations(unittest.TestCase):
def test_packages(self):
--
1.7.1
More information about the pkg-python-debian-maint
mailing list