[PATCH 07/12] copyright: Add a FileParagraph class.

John Wright jsw at debian.org
Sun Aug 31 21:26:13 UTC 2014


From: John Wright <jsw at google.com>

A FileParagraph matches a set of files via a list of shell-like globs,
and keeps track of their copyright and license information.
---
 lib/debian/copyright.py | 115 +++++++++++++++++++++++++++++-
 tests/test_copyright.py | 183 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 297 insertions(+), 1 deletion(-)

diff --git a/lib/debian/copyright.py b/lib/debian/copyright.py
index 7c62167..7e82e0b 100644
--- a/lib/debian/copyright.py
+++ b/lib/debian/copyright.py
@@ -28,8 +28,8 @@ from __future__ import unicode_literals
 
 import collections
 import itertools
+import io
 import re
-import string
 import warnings
 
 from debian import deb822
@@ -257,6 +257,119 @@ class License(collections.namedtuple('License', 'synopsis text')):
     # TODO(jsw): Provide methods to look up license text for known licenses?
 
 
+def globs_to_re(globs):
+    r"""Returns an re object for the given globs.
+
+    Only * and ? wildcards are supported.  Literal * and ? may be matched via
+    \* and \?, respectively.  A literal backslash is matched \\.  Any other
+    character after a backslash is forbidden.
+
+    Empty globs match nothing.
+
+    Raises ValueError if any of the globs is illegal.
+    """
+    buf = io.StringIO()
+    for i, glob in enumerate(globs):
+        if i != 0:
+            buf.write('|')
+        i = 0
+        n = len(glob)
+        while i < n:
+            c = glob[i]
+            i += 1
+            if c == '*':
+                buf.write('.*')
+            elif c == '?':
+                buf.write('.')
+            elif c == '\\':
+                if i < n:
+                    c = glob[i]
+                    i += 1
+                else:
+                    raise ValueError('single backslash not allowed at end')
+                if c in r'\?*':
+                    buf.write(re.escape(c))
+                else:
+                    raise ValueError(r'invalid escape sequence: \%s' % c)
+            else:
+                buf.write(re.escape(c))
+
+    # Patterns must be anchored at the end of the string.  (We use \Z instead
+    # of $ so that this works correctly for filenames including \n.)
+    buf.write(r'\Z')
+    return re.compile(buf.getvalue(), re.MULTILINE | re.DOTALL)
+
+
+class FilesParagraph(deb822.RestrictedWrapper):
+    """Represents a Files paragraph of a debian/copyright file.
+
+    This kind of paragraph is used to specify the copyright and license for a
+    particular set of files in the package.
+    """
+
+    def __init__(self, data, _internal_validate=True):
+        super(FilesParagraph, self).__init__(data)
+
+        if _internal_validate:
+            if 'Files' not in data:
+                raise ValueError('"Files" field required')
+            # For the other "required" fields, we just warn for now.  Perhaps
+            # these should be upgraded to exceptions (potentially protected by
+            # a "strict" param).
+            if 'Copyright' not in data:
+                warnings.warn('Files paragraph missing Copyright field')
+            if 'License' not in data:
+                warnings.warn('Files paragraph missing License field')
+
+            if not self.files:
+                warnings.warn('Files paragraph has empty Files field')
+
+        self.__cached_files_pat = (None, None)
+
+    @classmethod
+    def create(cls, files, copyright, license):
+        """Create a new FilesParagraph from its required parts.
+
+        :param files: The list of file globs.
+        :param copyright: The copyright for the files (free-form text).
+        :param license: The Licence for the files.
+        """
+        p = cls(deb822.Deb822(), _internal_validate=False)
+        p.files = files
+        p.copyright = copyright
+        p.license = license
+        return p
+
+    def files_pattern(self):
+        """Returns a regular expression equivalent to the Files globs.
+
+        Caches the result until files is set to a different value.
+
+        Raises ValueError if any of the globs are invalid.
+        """
+        files_str = self['files']
+        if self.__cached_files_pat[0] != files_str:
+            self.__cached_files_pat = (files_str, globs_to_re(self.files))
+        return self.__cached_files_pat[1]
+
+    def matches(self, filename):
+        """Returns True iff filename is matched by a glob in Files."""
+        pat = self.files_pattern()
+        return pat.match(filename) is not None
+
+    files = deb822.RestrictedField(
+        'Files', from_str=_SpaceSeparated.from_str,
+        to_str=_SpaceSeparated.to_str, allow_none=False)
+
+    copyright = deb822.RestrictedField('Copyright', allow_none=False)
+
+    license = deb822.RestrictedField(
+        'License', from_str=License.from_str, to_str=License.to_str,
+        allow_none=False)
+
+    comment = deb822.RestrictedField('Comment')
+
+
 class LicenseParagraph(deb822.RestrictedWrapper):
     """Represents a standalone license paragraph of a debian/copyright file.
 
diff --git a/tests/test_copyright.py b/tests/test_copyright.py
index 2632db5..29c9f10 100755
--- a/tests/test_copyright.py
+++ b/tests/test_copyright.py
@@ -19,6 +19,7 @@
 
 from __future__ import unicode_literals
 
+import re
 import sys
 import unittest
 
@@ -388,6 +389,188 @@ class LicenseParagraphTest(unittest.TestCase):
         with self.assertRaises(deb822.RestrictedFieldError):
             lp['Files'] = 'foo/*'
 
+class GlobsToReTest(unittest.TestCase):
+
+    def setUp(self):
+        self.flags = re.MULTILINE | re.DOTALL
+
+    def assertReEqual(self, a, b):
+        self.assertEqual(a.pattern, b.pattern)
+        self.assertEqual(a.flags, b.flags)
+
+    def test_empty(self):
+        self.assertReEqual(
+            re.compile(r'\Z', self.flags), copyright.globs_to_re([]))
+
+    def test_star(self):
+        pat = copyright.globs_to_re(['*'])
+        self.assertReEqual(re.compile(r'.*\Z', self.flags), pat)
+        self.assertTrue(pat.match('foo'))
+        self.assertTrue(pat.match('foo/bar/baz'))
+
+    def test_star_prefix(self):
+        e = re.escape
+        pat = copyright.globs_to_re(['*.in'])
+        expected = re.compile('.*' + e('.in') + r'\Z', self.flags)
+        self.assertReEqual(expected, pat)
+        self.assertFalse(pat.match('foo'))
+        self.assertFalse(pat.match('in'))
+        self.assertTrue(pat.match('Makefile.in'))
+        self.assertFalse(pat.match('foo/bar/in'))
+        self.assertTrue(pat.match('foo/bar/Makefile.in'))
+
+    def test_star_prefix_with_slash(self):
+        e = re.escape
+        pat = copyright.globs_to_re(['*/Makefile.in'])
+        expected = re.compile('.*' + e('/Makefile.in') + r'\Z', self.flags)
+        self.assertReEqual(expected, pat)
+        self.assertFalse(pat.match('foo'))
+        self.assertFalse(pat.match('in'))
+        self.assertFalse(pat.match('foo/bar/in'))
+        self.assertTrue(pat.match('foo/Makefile.in'))
+        self.assertTrue(pat.match('foo/bar/Makefile.in'))
+
+    def test_question_mark(self):
+        e = re.escape
+        pat = copyright.globs_to_re(['foo/messages.??_??.txt'])
+        expected = re.compile(
+            e('foo/messages.') + '..' + e('_') + '..' + e('.txt') + r'\Z',
+            self.flags)
+        self.assertReEqual(expected, pat)
+        self.assertFalse(pat.match('messages.en_US.txt'))
+        self.assertTrue(pat.match('foo/messages.en_US.txt'))
+        self.assertTrue(pat.match('foo/messages.ja_JP.txt'))
+        self.assertFalse(pat.match('foo/messages_ja_JP.txt'))
+
+    def test_multi_literal(self):
+        e = re.escape
+        pat = copyright.globs_to_re(['Makefile.in', 'foo/bar'])
+        expected = re.compile(
+            e('Makefile.in') + '|' + e('foo/bar') + r'\Z', self.flags)
+        self.assertReEqual(expected, pat)
+        self.assertTrue(pat.match('Makefile.in'))
+        self.assertFalse(pat.match('foo/Makefile.in'))
+        self.assertTrue(pat.match('foo/bar'))
+        self.assertFalse(pat.match('foo/barbaz'))
+        self.assertFalse(pat.match('foo/bar/baz'))
+        self.assertFalse(pat.match('a/foo/bar'))
+
+    def test_multi_wildcard(self):
+        e = re.escape
+        pat = copyright.globs_to_re(
+            ['debian/*', '*.Debian', 'translations/fr_??/*'])
+        expected = re.compile(
+            e('debian/') + '.*|.*' + e('.Debian') + '|' +
+            e('translations/fr_') + '..' + e('/') + r'.*\Z',
+            self.flags)
+        self.assertReEqual(expected, pat)
+        self.assertTrue(pat.match('debian/rules'))
+        self.assertFalse(pat.match('other/debian/rules'))
+        self.assertTrue(pat.match('README.Debian'))
+        self.assertTrue(pat.match('foo/bar/README.Debian'))
+        self.assertTrue(pat.match('translations/fr_FR/a.txt'))
+        self.assertTrue(pat.match('translations/fr_BE/a.txt'))
+        self.assertFalse(pat.match('translations/en_US/a.txt'))
+
+    def test_literal_backslash(self):
+        e = re.escape
+        pat = copyright.globs_to_re([r'foo/bar\\baz.c', r'bar/quux\\'])
+        expected = re.compile(
+            e(r'foo/bar\baz.c') + '|' + e('bar/quux\\') + r'\Z', self.flags)
+        self.assertReEqual(expected, pat)
+
+        self.assertFalse(pat.match('foo/bar.baz.c'))
+        self.assertFalse(pat.match('foo/bar/baz.c'))
+        self.assertTrue(pat.match(r'foo/bar\baz.c'))
+        self.assertFalse(pat.match('bar/quux'))
+        self.assertTrue(pat.match('bar/quux\\'))
+
+    def test_illegal_backslash(self):
+        with self.assertRaises(ValueError) as cm:
+            copyright.globs_to_re([r'foo/a\b.c'])
+            self.assertEqual((r'invalid escape sequence: \b',),
+                             cm.exception.args)
+
+        with self.assertRaises(ValueError) as cm:
+            copyright.globs_to_re('foo/bar\\')
+            self.assertEqual(('single backslash not allowed at end',),
+                             cm.exception.args)
+
+
+class FilesParagraphTest(unittest.TestCase):
+
+    def setUp(self):
+        self.prototype = deb822.Deb822()
+        self.prototype['Files'] = '*'
+        self.prototype['Copyright'] = 'Foo'
+        self.prototype['License'] = 'ISC'
+
+    def test_files_property(self):
+        fp = copyright.FilesParagraph(self.prototype)
+        self.assertEqual(('*',), fp.files)
+
+        fp.files = ['debian/*']
+        self.assertEqual(('debian/*',), fp.files)
+        self.assertEqual('debian/*', fp['files'])
+
+        fp.files = ['src/foo/*', 'src/bar/*']
+        self.assertEqual(('src/foo/*', 'src/bar/*'), fp.files)
+        self.assertEqual('src/foo/* src/bar/*', fp['files'])
+
+        with self.assertRaises(TypeError):
+            fp.files = None
+
+        self.prototype['Files'] = 'foo/*\tbar/*\n\tbaz/*\n quux/*'
+        fp = copyright.FilesParagraph(self.prototype)
+        self.assertEqual(('foo/*', 'bar/*', 'baz/*', 'quux/*'), fp.files)
+
+    def test_license_property(self):
+        fp = copyright.FilesParagraph(self.prototype)
+        self.assertEqual(copyright.License('ISC'), fp.license)
+        fp.license = copyright.License('ISC', '[LICENSE TEXT]')
+        self.assertEqual(copyright.License('ISC', '[LICENSE TEXT]'), fp.license)
+        self.assertEqual('ISC\n [LICENSE TEXT]', fp['license'])
+
+        with self.assertRaises(TypeError):
+            fp.license = None
+
+    def test_matches(self):
+        fp = copyright.FilesParagraph(self.prototype)
+        self.assertTrue(fp.matches('foo/bar.cc'))
+        self.assertTrue(fp.matches('Makefile'))
+        self.assertTrue(fp.matches('debian/rules'))
+
+        fp.files = ['debian/*']
+        self.assertFalse(fp.matches('foo/bar.cc'))
+        self.assertFalse(fp.matches('Makefile'))
+        self.assertTrue(fp.matches('debian/rules'))
+
+        fp.files = ['Makefile', 'foo/*']
+        self.assertTrue(fp.matches('foo/bar.cc'))
+        self.assertTrue(fp.matches('Makefile'))
+        self.assertFalse(fp.matches('debian/rules'))
+
+    def test_create(self):
+        fp = copyright.FilesParagraph.create(
+            files=['Makefile', 'foo/*'],
+            copyright='Copyright 2014 Some Guy',
+            license=copyright.License('ISC'))
+        self.assertEqual(('Makefile', 'foo/*'), fp.files)
+        self.assertEqual('Copyright 2014 Some Guy', fp.copyright)
+        self.assertEqual(copyright.License('ISC'), fp.license)
+
+        with self.assertRaises(TypeError):
+            copyright.FilesParagraph.create(
+                files=['*'], copyright='foo', license=None)
+
+        with self.assertRaises(TypeError):
+            copyright.FilesParagraph.create(
+                files=['*'], copyright=None, license=copyright.License('ISC'))
+
+        with self.assertRaises(TypeError):
+            copyright.FilesParagraph.create(
+                files=None, copyright='foo', license=copyright.License('ISC'))
+
 
 class HeaderTest(unittest.TestCase):
 
-- 
2.1.0




More information about the pkg-python-debian-maint mailing list