Bug#597120: [PATCH] Avoid dumping unparseable data

John Wright jsw at debian.org
Sat Sep 18 08:15:42 UTC 2010


Add an input validation method that is called by the default __setitem__,
and add some validation at output time for multivalued fields (since
their input is a mutable list which makes it unsuitable for validation
at intput time).

Closes: #597120
---
 debian/changelog     |    6 ++++++
 lib/debian/deb822.py |   45 ++++++++++++++++++++++++++++++++++++++++++---
 tests/test_deb822.py |   28 ++++++++++++++++++++++++++++
 3 files changed, 76 insertions(+), 3 deletions(-)

diff --git a/debian/changelog b/debian/changelog
index 0292524..492b5df 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+python-debian (0.1.19) UNRELEASED; urgency=low
+
+  * Avoid dumping unparseable data. (Closes: #597120)
+
+ -- John Wright <jsw at debian.org>  Sat, 18 Sep 2010 00:47:04 -0600
+
 python-debian (0.1.18) unstable; urgency=low
 
   * Support installation together with older versions of python-apt.
diff --git a/lib/debian/deb822.py b/lib/debian/deb822.py
index a0cad69..fc87124 100644
--- a/lib/debian/deb822.py
+++ b/lib/debian/deb822.py
@@ -406,8 +406,9 @@ class Deb822(Deb822Dict):
             value = self.get_as_string(key)
             if not value or value[0] == '\n':
                 # Avoid trailing whitespace after "Field:" if it's on its own
-                # line or the value is empty
-                # XXX Uh, really print value if value == '\n'?
+                # line or the value is empty.  We don't have to worry about the
+                # case where value == '\n', since we ensure that is not the
+                # case in __setitem__.
                 entry = '%s:%s\n' % (key, value)
             else:
                 entry = '%s: %s\n' % (key, value)
@@ -588,6 +589,31 @@ class Deb822(Deb822Dict):
 
         return self.gpg_info
 
+    def validate_input(self, key, value):
+        """Raise ValueError if value is not a valid value for key
+
+        Subclasses that do interesting things for different keys may wish to
+        override this method.
+        """
+
+        # The value cannot end in a newline (if it did, dumping the object
+        # would result in multiple stanzas)
+        if value.endswith('\n'):
+            raise ValueError("value must not end in '\\n'")
+
+        # Make sure there are no blank lines (actually, the first one is
+        # allowed to be blank, but no others), and each subsequent line starts
+        # with whitespace
+        for line in value.splitlines()[1:]:
+            if not line:
+                raise ValueError("value must not have blank lines")
+            if not line[0].isspace():
+                raise ValueError("each line must start with whitespace")
+
+    def __setitem__(self, key, value):
+        self.validate_input(key, value)
+        Deb822Dict.__setitem__(self, key, value)
+
 ###
 
 # XXX check what happens if input contains more that one signature
@@ -892,6 +918,16 @@ class _multivalued(Deb822):
             for line in filter(None, contents.splitlines()):
                 updater_method(Deb822Dict(zip(fields, line.split())))
 
+    def validate_input(self, key, value):
+        if key.lower() in self._multivalued_fields:
+            # It's difficult to write a validator for multivalued fields, and
+            # basically futile, since we allow mutable lists.  In any case,
+            # with sanity checking in get_as_string, we shouldn't ever output
+            # unparseable data.
+            pass
+        else:
+            Deb822.validate_input(self, key, value)
+
     def get_as_string(self, key):
         keyl = key.lower()
         if keyl in self._multivalued_fields:
@@ -909,13 +945,16 @@ class _multivalued(Deb822):
                 field_lengths = {}
             for item in array:
                 for x in order:
-                    raw_value = str(item[x])
+                    raw_value = unicode(item[x])
                     try:
                         length = field_lengths[keyl][x]
                     except KeyError:
                         value = raw_value
                     else:
                         value = (length - len(raw_value)) * " " + raw_value
+                    if "\n" in value:
+                        raise ValueError("'\\n' not allowed in component of "
+                                         "multivalued field %s" % key)
                     fd.write(" %s" % value)
                 fd.write("\n")
             return fd.getvalue().rstrip("\n")
diff --git a/tests/test_deb822.py b/tests/test_deb822.py
index 891f4cd..64fd77a 100755
--- a/tests/test_deb822.py
+++ b/tests/test_deb822.py
@@ -726,6 +726,34 @@ Description: python modules to work with Debian-related data formats
             self.assertEqual(p2['uploaders'],
                              u'Frank Küster <frank at debian.org>')
 
+    @staticmethod
+    def _dictset(d, key, value):
+        d[key] = value
+
+    def test_field_value_ends_in_newline(self):
+        """Field values are not allowed to end with newlines"""
+
+        d = deb822.Deb822()
+        self.assertRaises(ValueError, self._dictset, d, 'foo', 'bar\n')
+        self.assertRaises(ValueError, self._dictset, d, 'foo', 'bar\nbaz\n')
+
+    def test_field_value_contains_blank_line(self):
+        """Field values are not allowed to contain blank lines"""
+
+        d = deb822.Deb822()
+        self.assertRaises(ValueError, self._dictset, d, 'foo', 'bar\n\nbaz')
+        self.assertRaises(ValueError, self._dictset, d, 'foo', '\n\nbaz')
+
+    def test_multivalued_field_contains_newline(self):
+        """Multivalued field components are not allowed to contain newlines"""
+
+        d = deb822.Dsc()
+        # We don't check at set time, since one could easily modify the list
+        # without deb822 knowing.  We instead check at get time.
+        d['Files'] = [{'md5sum': 'deadbeef', 'size': '9605', 'name': 'bad\n'}]
+        self.assertRaises(ValueError, d.get_as_string, 'files')
+
+
 class TestPkgRelations(unittest.TestCase):
 
     def test_packages(self):
-- 
1.7.1






More information about the pkg-python-debian-maint mailing list