[PATCH 07/12] copyright: Add a FileParagraph class.
John Wright
jsw at debian.org
Sun Aug 31 21:26:13 UTC 2014
From: John Wright <jsw at google.com>
A FileParagraph matches a set of files via a list of shell-like globs,
and keeps track of their copyright and license information.
---
lib/debian/copyright.py | 115 +++++++++++++++++++++++++++++-
tests/test_copyright.py | 183 ++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 297 insertions(+), 1 deletion(-)
diff --git a/lib/debian/copyright.py b/lib/debian/copyright.py
index 7c62167..7e82e0b 100644
--- a/lib/debian/copyright.py
+++ b/lib/debian/copyright.py
@@ -28,8 +28,8 @@ from __future__ import unicode_literals
import collections
import itertools
+import io
import re
-import string
import warnings
from debian import deb822
@@ -257,6 +257,119 @@ class License(collections.namedtuple('License', 'synopsis text')):
# TODO(jsw): Provide methods to look up license text for known licenses?
+def globs_to_re(globs):
+ r"""Returns an re object for the given globs.
+
+ Only * and ? wildcards are supported. Literal * and ? may be matched via
+ \* and \?, respectively. A literal backslash is matched \\. Any other
+ character after a backslash is forbidden.
+
+ Empty globs match nothing.
+
+ Raises ValueError if any of the globs is illegal.
+ """
+ buf = io.StringIO()
+ for i, glob in enumerate(globs):
+ if i != 0:
+ buf.write('|')
+ i = 0
+ n = len(glob)
+ while i < n:
+ c = glob[i]
+ i += 1
+ if c == '*':
+ buf.write('.*')
+ elif c == '?':
+ buf.write('.')
+ elif c == '\\':
+ if i < n:
+ c = glob[i]
+ i += 1
+ else:
+ raise ValueError('single backslash not allowed at end')
+ if c in r'\?*':
+ buf.write(re.escape(c))
+ else:
+ raise ValueError(r'invalid escape sequence: \%s' % c)
+ else:
+ buf.write(re.escape(c))
+
+ # Patterns must be anchored at the end of the string. (We use \Z instead
+ # of $ so that this works correctly for filenames including \n.)
+ buf.write(r'\Z')
+ return re.compile(buf.getvalue(), re.MULTILINE | re.DOTALL)
+
+
+class FilesParagraph(deb822.RestrictedWrapper):
+ """Represents a Files paragraph of a debian/copyright file.
+
+ This kind of paragraph is used to specify the copyright and license for a
+ particular set of files in the package.
+ """
+
+ def __init__(self, data, _internal_validate=True):
+ super(FilesParagraph, self).__init__(data)
+
+ if _internal_validate:
+ if 'Files' not in data:
+ raise ValueError('"Files" field required')
+ # For the other "required" fields, we just warn for now. Perhaps
+ # these should be upgraded to exceptions (potentially protected by
+ # a "strict" param).
+ if 'Copyright' not in data:
+ warnings.warn('Files paragraph missing Copyright field')
+ if 'License' not in data:
+ warnings.warn('Files paragraph missing License field')
+
+ if not self.files:
+ warnings.warn('Files paragraph has empty Files field')
+
+ self.__cached_files_pat = (None, None)
+
+ @classmethod
+ def create(cls, files, copyright, license):
+ """Create a new FilesParagraph from its required parts.
+
+ :param files: The list of file globs.
+ :param copyright: The copyright for the files (free-form text).
+ :param license: The Licence for the files.
+ """
+ p = cls(deb822.Deb822(), _internal_validate=False)
+ p.files = files
+ p.copyright = copyright
+ p.license = license
+ return p
+
+ def files_pattern(self):
+ """Returns a regular expression equivalent to the Files globs.
+
+ Caches the result until files is set to a different value.
+
+ Raises ValueError if any of the globs are invalid.
+ """
+ files_str = self['files']
+ if self.__cached_files_pat[0] != files_str:
+ self.__cached_files_pat = (files_str, globs_to_re(self.files))
+ return self.__cached_files_pat[1]
+
+ def matches(self, filename):
+ """Returns True iff filename is matched by a glob in Files."""
+ pat = self.files_pattern()
+ return pat.match(filename) is not None
+
+ files = deb822.RestrictedField(
+ 'Files', from_str=_SpaceSeparated.from_str,
+ to_str=_SpaceSeparated.to_str, allow_none=False)
+
+ copyright = deb822.RestrictedField('Copyright', allow_none=False)
+
+ license = deb822.RestrictedField(
+ 'License', from_str=License.from_str, to_str=License.to_str,
+ allow_none=False)
+
+ comment = deb822.RestrictedField('Comment')
+
+
class LicenseParagraph(deb822.RestrictedWrapper):
"""Represents a standalone license paragraph of a debian/copyright file.
diff --git a/tests/test_copyright.py b/tests/test_copyright.py
index 2632db5..29c9f10 100755
--- a/tests/test_copyright.py
+++ b/tests/test_copyright.py
@@ -19,6 +19,7 @@
from __future__ import unicode_literals
+import re
import sys
import unittest
@@ -388,6 +389,188 @@ class LicenseParagraphTest(unittest.TestCase):
with self.assertRaises(deb822.RestrictedFieldError):
lp['Files'] = 'foo/*'
+class GlobsToReTest(unittest.TestCase):
+
+ def setUp(self):
+ self.flags = re.MULTILINE | re.DOTALL
+
+ def assertReEqual(self, a, b):
+ self.assertEqual(a.pattern, b.pattern)
+ self.assertEqual(a.flags, b.flags)
+
+ def test_empty(self):
+ self.assertReEqual(
+ re.compile(r'\Z', self.flags), copyright.globs_to_re([]))
+
+ def test_star(self):
+ pat = copyright.globs_to_re(['*'])
+ self.assertReEqual(re.compile(r'.*\Z', self.flags), pat)
+ self.assertTrue(pat.match('foo'))
+ self.assertTrue(pat.match('foo/bar/baz'))
+
+ def test_star_prefix(self):
+ e = re.escape
+ pat = copyright.globs_to_re(['*.in'])
+ expected = re.compile('.*' + e('.in') + r'\Z', self.flags)
+ self.assertReEqual(expected, pat)
+ self.assertFalse(pat.match('foo'))
+ self.assertFalse(pat.match('in'))
+ self.assertTrue(pat.match('Makefile.in'))
+ self.assertFalse(pat.match('foo/bar/in'))
+ self.assertTrue(pat.match('foo/bar/Makefile.in'))
+
+ def test_star_prefix_with_slash(self):
+ e = re.escape
+ pat = copyright.globs_to_re(['*/Makefile.in'])
+ expected = re.compile('.*' + e('/Makefile.in') + r'\Z', self.flags)
+ self.assertReEqual(expected, pat)
+ self.assertFalse(pat.match('foo'))
+ self.assertFalse(pat.match('in'))
+ self.assertFalse(pat.match('foo/bar/in'))
+ self.assertTrue(pat.match('foo/Makefile.in'))
+ self.assertTrue(pat.match('foo/bar/Makefile.in'))
+
+ def test_question_mark(self):
+ e = re.escape
+ pat = copyright.globs_to_re(['foo/messages.??_??.txt'])
+ expected = re.compile(
+ e('foo/messages.') + '..' + e('_') + '..' + e('.txt') + r'\Z',
+ self.flags)
+ self.assertReEqual(expected, pat)
+ self.assertFalse(pat.match('messages.en_US.txt'))
+ self.assertTrue(pat.match('foo/messages.en_US.txt'))
+ self.assertTrue(pat.match('foo/messages.ja_JP.txt'))
+ self.assertFalse(pat.match('foo/messages_ja_JP.txt'))
+
+ def test_multi_literal(self):
+ e = re.escape
+ pat = copyright.globs_to_re(['Makefile.in', 'foo/bar'])
+ expected = re.compile(
+ e('Makefile.in') + '|' + e('foo/bar') + r'\Z', self.flags)
+ self.assertReEqual(expected, pat)
+ self.assertTrue(pat.match('Makefile.in'))
+ self.assertFalse(pat.match('foo/Makefile.in'))
+ self.assertTrue(pat.match('foo/bar'))
+ self.assertFalse(pat.match('foo/barbaz'))
+ self.assertFalse(pat.match('foo/bar/baz'))
+ self.assertFalse(pat.match('a/foo/bar'))
+
+ def test_multi_wildcard(self):
+ e = re.escape
+ pat = copyright.globs_to_re(
+ ['debian/*', '*.Debian', 'translations/fr_??/*'])
+ expected = re.compile(
+ e('debian/') + '.*|.*' + e('.Debian') + '|' +
+ e('translations/fr_') + '..' + e('/') + r'.*\Z',
+ self.flags)
+ self.assertReEqual(expected, pat)
+ self.assertTrue(pat.match('debian/rules'))
+ self.assertFalse(pat.match('other/debian/rules'))
+ self.assertTrue(pat.match('README.Debian'))
+ self.assertTrue(pat.match('foo/bar/README.Debian'))
+ self.assertTrue(pat.match('translations/fr_FR/a.txt'))
+ self.assertTrue(pat.match('translations/fr_BE/a.txt'))
+ self.assertFalse(pat.match('translations/en_US/a.txt'))
+
+ def test_literal_backslash(self):
+ e = re.escape
+ pat = copyright.globs_to_re([r'foo/bar\\baz.c', r'bar/quux\\'])
+ expected = re.compile(
+ e(r'foo/bar\baz.c') + '|' + e('bar/quux\\') + r'\Z', self.flags)
+ self.assertReEqual(expected, pat)
+
+ self.assertFalse(pat.match('foo/bar.baz.c'))
+ self.assertFalse(pat.match('foo/bar/baz.c'))
+ self.assertTrue(pat.match(r'foo/bar\baz.c'))
+ self.assertFalse(pat.match('bar/quux'))
+ self.assertTrue(pat.match('bar/quux\\'))
+
+ def test_illegal_backslash(self):
+ with self.assertRaises(ValueError) as cm:
+ copyright.globs_to_re([r'foo/a\b.c'])
+ self.assertEqual((r'invalid escape sequence: \b',),
+ cm.exception.args)
+
+ with self.assertRaises(ValueError) as cm:
+ copyright.globs_to_re('foo/bar\\')
+ self.assertEqual(('single backslash not allowed at end',),
+ cm.exception.args)
+
+
+class FilesParagraphTest(unittest.TestCase):
+
+ def setUp(self):
+ self.prototype = deb822.Deb822()
+ self.prototype['Files'] = '*'
+ self.prototype['Copyright'] = 'Foo'
+ self.prototype['License'] = 'ISC'
+
+ def test_files_property(self):
+ fp = copyright.FilesParagraph(self.prototype)
+ self.assertEqual(('*',), fp.files)
+
+ fp.files = ['debian/*']
+ self.assertEqual(('debian/*',), fp.files)
+ self.assertEqual('debian/*', fp['files'])
+
+ fp.files = ['src/foo/*', 'src/bar/*']
+ self.assertEqual(('src/foo/*', 'src/bar/*'), fp.files)
+ self.assertEqual('src/foo/* src/bar/*', fp['files'])
+
+ with self.assertRaises(TypeError):
+ fp.files = None
+
+ self.prototype['Files'] = 'foo/*\tbar/*\n\tbaz/*\n quux/*'
+ fp = copyright.FilesParagraph(self.prototype)
+ self.assertEqual(('foo/*', 'bar/*', 'baz/*', 'quux/*'), fp.files)
+
+ def test_license_property(self):
+ fp = copyright.FilesParagraph(self.prototype)
+ self.assertEqual(copyright.License('ISC'), fp.license)
+ fp.license = copyright.License('ISC', '[LICENSE TEXT]')
+ self.assertEqual(copyright.License('ISC', '[LICENSE TEXT]'), fp.license)
+ self.assertEqual('ISC\n [LICENSE TEXT]', fp['license'])
+
+ with self.assertRaises(TypeError):
+ fp.license = None
+
+ def test_matches(self):
+ fp = copyright.FilesParagraph(self.prototype)
+ self.assertTrue(fp.matches('foo/bar.cc'))
+ self.assertTrue(fp.matches('Makefile'))
+ self.assertTrue(fp.matches('debian/rules'))
+
+ fp.files = ['debian/*']
+ self.assertFalse(fp.matches('foo/bar.cc'))
+ self.assertFalse(fp.matches('Makefile'))
+ self.assertTrue(fp.matches('debian/rules'))
+
+ fp.files = ['Makefile', 'foo/*']
+ self.assertTrue(fp.matches('foo/bar.cc'))
+ self.assertTrue(fp.matches('Makefile'))
+ self.assertFalse(fp.matches('debian/rules'))
+
+ def test_create(self):
+ fp = copyright.FilesParagraph.create(
+ files=['Makefile', 'foo/*'],
+ copyright='Copyright 2014 Some Guy',
+ license=copyright.License('ISC'))
+ self.assertEqual(('Makefile', 'foo/*'), fp.files)
+ self.assertEqual('Copyright 2014 Some Guy', fp.copyright)
+ self.assertEqual(copyright.License('ISC'), fp.license)
+
+ with self.assertRaises(TypeError):
+ copyright.FilesParagraph.create(
+ files=['*'], copyright='foo', license=None)
+
+ with self.assertRaises(TypeError):
+ copyright.FilesParagraph.create(
+ files=['*'], copyright=None, license=copyright.License('ISC'))
+
+ with self.assertRaises(TypeError):
+ copyright.FilesParagraph.create(
+ files=None, copyright='foo', license=copyright.License('ISC'))
+
class HeaderTest(unittest.TestCase):
--
2.1.0
More information about the pkg-python-debian-maint
mailing list