[PATCH 04/12] Add a copyright module.

John Wright jsw at debian.org
Sun Aug 31 21:26:10 UTC 2014


From: John Wright <jsw at google.com>

The new module can parse, create, and edit DEP5-formatted
debian/copyright files.

Currently it only parses the header paragraph, except for the License
field.  Follow-up changes will add support for the License field and the
rest of the paragraphs.
---
 lib/debian/copyright.py | 222 +++++++++++++++++++++++++++++++++++++++
 tests/test_copyright.py | 273 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 495 insertions(+)
 create mode 100644 lib/debian/copyright.py
 create mode 100755 tests/test_copyright.py

diff --git a/lib/debian/copyright.py b/lib/debian/copyright.py
new file mode 100644
index 0000000..8315efc
--- /dev/null
+++ b/lib/debian/copyright.py
@@ -0,0 +1,222 @@
+# vim: fileencoding=utf-8
+#
+# Copyright (C) 2014       Google, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation, either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+"""Utilities for parsing and creating machine-readable debian/copyright files.
+
+The specification for the format (also known as DEP5) is available here:
+https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+
+TODO(jsw): Add example usage.
+"""
+
+from __future__ import unicode_literals
+
+import collections
+import re
+import string
+import warnings
+
+from debian import deb822
+
+
+_CURRENT_FORMAT = (
+    'http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/')
+
+_KNOWN_FORMATS = frozenset([
+    _CURRENT_FORMAT,
+    # TODO(jsw): Transparently rewrite https:// as http://, at least for this?
+    'https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/',
+])
+
+
+class Error(Exception):
+    """Base class for exceptions in this module."""
+
+
+class NotMachineReadableError(Error):
+    """Raised when the input is not a machine-readable debian/copyright file."""
+
+
+class Copyright(object):
+    """Represents a debian/copyright file."""
+
+    def __init__(self, sequence=None, encoding='utf-8'):
+        """Initializer.
+
+        :param sequence: Sequence of lines, e.g. a list of strings or a
+            file-like object.  If not specified, a blank Copyright object is
+            initialized.
+        :param encoding: Encoding to use, in case input is raw byte strings.
+            It is recommended to use unicode objects everywhere instead, e.g.
+            by opening files in text mode.
+
+        Raises:
+            NotMachineReadableError if 'sequence' does not contain a
+                machine-readable debian/copyright file.
+        """
+        super(Copyright, self).__init__()
+
+        if sequence is not None:
+            paragraphs = list(deb822.Deb822.iter_paragraphs(
+                sequence=sequence, encoding=encoding))
+            if len(paragraphs) > 0:
+                self.__header = Header(paragraphs[0])
+            # TODO(jsw): Parse the rest of the paragraphs.
+        else:
+            self.__header = Header()
+
+    @property
+    def header(self):
+        """The file header paragraph."""
+        return self.__header
+
+    @header.setter
+    def header(self, hdr):
+        if not isinstance(hdr, Header):
+            raise TypeError('value must be a Header object')
+        self.__header = hdr
+
+
+def _single_line(s):
+    """Returns s if it is a single line; otherwise raises ValueError."""
+    if '\n' in s:
+        raise ValueError('must be single line')
+    return s
+
+
+class _LineBased(object):
+    """Namespace for conversion methods for line-based lists as tuples."""
+    # TODO(jsw): Expose this somewhere else?  It may have more general utility.
+
+    @staticmethod
+    def from_str(s):
+        """Returns the lines in 's', with whitespace stripped, as a tuple."""
+        return tuple(v for v in
+                     (line.strip() for line in (s or '').strip().splitlines())
+                     if v)
+
+    @staticmethod
+    def to_str(seq):
+        """Returns the sequence as a string with each element on its own line.
+
+        If 'seq' has one element, the result will be on a single line.
+        Otherwise, the first line will be blank.
+        """
+        l = list(seq)
+        if not l:
+            return None
+
+        def process_and_validate(s):
+            s = s.strip()
+            if not s:
+                raise ValueError('values must not be empty')
+            if '\n' in s:
+                raise ValueError('values must not contain newlines')
+            return s
+
+        if len(l) == 1:
+            return process_and_validate(l[0])
+
+        tmp = ['']
+        for s in l:
+            tmp.append(' ' + process_and_validate(s))
+        return '\n'.join(tmp)
+
+
+class _SpaceSeparated(object):
+    """Namespace for conversion methods for space-separated lists as tuples."""
+    # TODO(jsw): Expose this somewhere else?  It may have more general utility.
+
+    _has_space = re.compile(r'\s')
+
+    @staticmethod
+    def from_str(s):
+        """Returns the values in s as a tuple (empty if only whitespace)."""
+        return tuple(v for v in (s or '').split() if v)
+
+    @classmethod
+    def to_str(cls, seq):
+        """Returns the sequence as a space-separated string (None if empty)."""
+        l = list(seq)
+        if not l:
+            return None
+        tmp = []
+        for s in l:
+            if cls._has_space.search(s):
+                raise ValueError('values must not contain whitespace')
+            s = s.strip()
+            if not s:
+                raise ValueError('values must not be empty')
+            tmp.append(s)
+        return ' '.join(tmp)
+
+
+class Header(deb822.RestrictedWrapper):
+    """Represents the header paragraph of a debian/copyright file.
+
+    Property values are all immutable, such that in order to modify them you
+    must explicitly set them (rather than modifying a returned reference).
+    """
+
+    def __init__(self, data=None):
+        """Initializer.
+
+        :param parsed: A deb822.Deb822 object for underlying data.  If None, a
+            new one will be created.
+        """
+        if data is None:
+            data = deb822.Deb822()
+            data['Format'] = _CURRENT_FORMAT
+        super(Header, self).__init__(data)
+
+        fmt = self.format
+        if fmt is None:
+            raise NotMachineReadableError(
+                'input is not a machine-readable debian/copyright')
+        if fmt not in _KNOWN_FORMATS:
+            warnings.warn('format not known: %r' % fmt)
+
+    def known_format(self):
+        """Returns True iff the format is known."""
+        return self.format in _KNOWN_FORMATS
+
+    def current_format(self):
+        """Returns True iff the format is the current format."""
+        return self.format == _CURRENT_FORMAT
+
+    format = deb822.RestrictedField(
+        'Format', to_str=_single_line, allow_none=False)
+
+    upstream_name = deb822.RestrictedField(
+        'Upstream-Name', to_str=_single_line)
+
+    upstream_contact = deb822.RestrictedField(
+        'Upstream-Contact', from_str=_LineBased.from_str,
+        to_str=_LineBased.to_str)
+
+    source = deb822.RestrictedField('Source')
+
+    disclaimer = deb822.RestrictedField('Disclaimer')
+
+    comment = deb822.RestrictedField('Comment')
+
+    # TODO(jsw): Parse this.
+    license = deb822.RestrictedField(
+        'License', to_str=lambda _: None, from_str=lambda _: None)
+
+    copyright = deb822.RestrictedField('Copyright')
diff --git a/tests/test_copyright.py b/tests/test_copyright.py
new file mode 100755
index 0000000..129d57e
--- /dev/null
+++ b/tests/test_copyright.py
@@ -0,0 +1,273 @@
+#! /usr/bin/python
+## vim: fileencoding=utf-8
+
+# Copyright (C) 2014 Google, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation, either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+from __future__ import unicode_literals
+
+import sys
+import unittest
+
+sys.path.insert(0, '../lib/')
+
+from debian import copyright
+from debian import deb822
+
+
+SIMPLE = """\
+Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Upstream-Name: X Solitaire
+Source: ftp://ftp.example.com/pub/games
+
+Files: *
+Copyright: Copyright 1998 John Doe <jdoe at example.com>
+License: GPL-2+
+ This program is free software; you can redistribute it
+ and/or modify it under the terms of the GNU General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later
+ version.
+ .
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ PURPOSE.  See the GNU General Public License for more
+ details.
+ .
+ You should have received a copy of the GNU General Public
+ License along with this package; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ Boston, MA  02110-1301 USA
+ .
+ On Debian systems, the full text of the GNU General Public
+ License version 2 can be found in the file
+ `/usr/share/common-licenses/GPL-2'.
+
+Files: debian/*
+Copyright: Copyright 1998 Jane Smith <jsmith at example.net>
+License: GPL-2+
+ [LICENSE TEXT]
+"""
+
+FORMAT = 'http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/'
+
+
+class LineBasedTest(unittest.TestCase):
+    """Test for _LineBased.{to,from}_str"""
+
+    def setUp(self):
+        # Alias for less typing.
+        self.lb = copyright._LineBased
+
+    def test_from_str_none(self):
+        self.assertEqual((), self.lb.from_str(None))
+
+    def test_from_str_empty(self):
+        self.assertEqual((), self.lb.from_str(''))
+
+    def test_from_str_single_line(self):
+        self.assertEqual(
+            ('Foo Bar <foo at bar.com>',),
+            self.lb.from_str('Foo Bar <foo at bar.com>'))
+
+    def test_from_str_single_value_after_newline(self):
+        self.assertEqual(
+            ('Foo Bar <foo at bar.com>',),
+            self.lb.from_str('\n Foo Bar <foo at bar.com>'))
+
+    def test_from_str_multiline(self):
+        self.assertEqual(
+            ('Foo Bar <foo at bar.com>', 'http://bar.com/foo'),
+            self.lb.from_str('\n Foo Bar <foo at bar.com>\n http://bar.com/foo'))
+
+    def test_to_str_empty(self):
+        self.assertIsNone(self.lb.to_str([]))
+        self.assertIsNone(self.lb.to_str(()))
+
+    def test_to_str_single(self):
+        self.assertEqual(
+            'Foo Bar <foo at bar.com>',
+            self.lb.to_str(['Foo Bar <foo at bar.com>']))
+
+    def test_to_str_multi_list(self):
+        self.assertEqual(
+            '\n Foo Bar <foo at bar.com>\n http://bar.com/foo',
+            self.lb.to_str(
+                ['Foo Bar <foo at bar.com>', 'http://bar.com/foo']))
+
+    def test_to_str_multi_tuple(self):
+        self.assertEqual(
+            '\n Foo Bar <foo at bar.com>\n http://bar.com/foo',
+            self.lb.to_str(
+                ('Foo Bar <foo at bar.com>', 'http://bar.com/foo')))
+
+    def test_to_str_empty_value(self):
+        with self.assertRaises(ValueError) as cm:
+            self.lb.to_str(['foo', '', 'bar'])
+        self.assertEqual(('values must not be empty',), cm.exception.args)
+
+    def test_to_str_whitespace_only_value(self):
+        with self.assertRaises(ValueError) as cm:
+            self.lb.to_str(['foo', ' \t', 'bar'])
+        self.assertEqual(('values must not be empty',), cm.exception.args)
+
+    def test_to_str_elements_stripped(self):
+        self.assertEqual(
+            '\n Foo Bar <foo at bar.com>\n http://bar.com/foo',
+            self.lb.to_str(
+                (' Foo Bar <foo at bar.com>\t', ' http://bar.com/foo  ')))
+
+    def test_to_str_newlines_single(self):
+        with self.assertRaises(ValueError) as cm:
+            self.lb.to_str([' Foo Bar <foo at bar.com>\n http://bar.com/foo  '])
+        self.assertEqual(
+            ('values must not contain newlines',), cm.exception.args)
+
+    def test_to_str_newlines_multi(self):
+        with self.assertRaises(ValueError) as cm:
+            self.lb.to_str(
+                ['bar', ' Foo Bar <foo at bar.com>\n http://bar.com/foo  '])
+        self.assertEqual(
+            ('values must not contain newlines',), cm.exception.args)
+
+
+class SpaceSeparatedTest(unittest.TestCase):
+    """Tests for _SpaceSeparated.{to,from}_str."""
+
+    def setUp(self):
+        # Alias for less typing.
+        self.ss = copyright._SpaceSeparated
+
+    def test_from_str_none(self):
+        self.assertEqual((), self.ss.from_str(None))
+
+    def test_from_str_empty(self):
+        self.assertEqual((), self.ss.from_str(' '))
+        self.assertEqual((), self.ss.from_str(''))
+
+    def test_from_str_single(self):
+        self.assertEqual(('foo',), self.ss.from_str('foo'))
+        self.assertEqual(('bar',), self.ss.from_str(' bar '))
+
+    def test_from_str_multi(self):
+        self.assertEqual(('foo', 'bar', 'baz'), self.ss.from_str('foo bar baz'))
+        self.assertEqual(
+            ('bar', 'baz', 'quux'), self.ss.from_str(' bar baz quux \t '))
+
+    def test_to_str_empty(self):
+        self.assertIsNone(self.ss.to_str([]))
+        self.assertIsNone(self.ss.to_str(()))
+
+    def test_to_str_single(self):
+        self.assertEqual('foo', self.ss.to_str(['foo']))
+
+    def test_to_str_multi(self):
+        self.assertEqual('foo bar baz', self.ss.to_str(['foo', 'bar', 'baz']))
+
+    def test_to_str_empty_value(self):
+        with self.assertRaises(ValueError) as cm:
+            self.ss.to_str(['foo', '', 'bar'])
+        self.assertEqual(('values must not be empty',), cm.exception.args)
+
+    def test_to_str_value_has_space_single(self):
+        with self.assertRaises(ValueError) as cm:
+            self.ss.to_str([' baz quux '])
+        self.assertEqual(
+            ('values must not contain whitespace',), cm.exception.args)
+
+    def test_to_str_value_has_space_multi(self):
+        with self.assertRaises(ValueError) as cm:
+            self.ss.to_str(['foo', ' baz quux '])
+        self.assertEqual(
+            ('values must not contain whitespace',), cm.exception.args)
+
+
+class CopyrightTest(unittest.TestCase):
+
+    def test_basic_parse_success(self):
+        c = copyright.Copyright(sequence=SIMPLE.splitlines())
+        self.assertEqual(FORMAT, c.header.format)
+        self.assertEqual(FORMAT, c.header['Format'])
+        self.assertEqual('X Solitaire', c.header.upstream_name)
+        self.assertEqual('X Solitaire', c.header['Upstream-Name'])
+        self.assertEqual('ftp://ftp.example.com/pub/games', c.header.source)
+        self.assertEqual('ftp://ftp.example.com/pub/games', c.header['Source'])
+        self.assertIsNone(c.header.license)
+
+
+class HeaderTest(unittest.TestCase):
+
+    def test_format_not_none(self):
+        h = copyright.Header()
+        self.assertEqual(FORMAT, h.format)
+        with self.assertRaises(TypeError) as cm:
+            h.format = None
+        self.assertEqual(('value must not be None',), cm.exception.args)
+
+    def test_upstream_name_single_line(self):
+        h = copyright.Header()
+        h.upstream_name = 'Foo Bar'
+        self.assertEqual('Foo Bar', h.upstream_name)
+        with self.assertRaises(ValueError) as cm:
+            h.upstream_name = 'Foo Bar\n Baz'
+        self.assertEqual(('must be single line',), cm.exception.args)
+
+    def test_upstream_contact_single_read(self):
+        data = deb822.Deb822()
+        data['Format'] = FORMAT
+        data['Upstream-Contact'] = 'Foo Bar <foo at bar.com>'
+        h = copyright.Header(data=data)
+        self.assertEqual(('Foo Bar <foo at bar.com>',), h.upstream_contact)
+
+    def test_upstream_contact_multi1_read(self):
+        data = deb822.Deb822()
+        data['Format'] = FORMAT
+        data['Upstream-Contact'] = 'Foo Bar <foo at bar.com>\n http://bar.com/foo'
+        h = copyright.Header(data=data)
+        self.assertEqual(
+            ('Foo Bar <foo at bar.com>', 'http://bar.com/foo'),
+            h.upstream_contact)
+
+    def test_upstream_contact_multi2_read(self):
+        data = deb822.Deb822()
+        data['Format'] = FORMAT
+        data['Upstream-Contact'] = (
+            '\n Foo Bar <foo at bar.com>\n http://bar.com/foo')
+        h = copyright.Header(data=data)
+        self.assertEqual(
+            ('Foo Bar <foo at bar.com>', 'http://bar.com/foo'),
+            h.upstream_contact)
+
+    def test_upstream_contact_single_write(self):
+        h = copyright.Header()
+        h.upstream_contact = ['Foo Bar <foo at bar.com>']
+        self.assertEqual(('Foo Bar <foo at bar.com>',), h.upstream_contact)
+        self.assertEqual('Foo Bar <foo at bar.com>', h['Upstream-Contact'])
+
+    def test_upstream_contact_multi_write(self):
+        h = copyright.Header()
+        h.upstream_contact = ['Foo Bar <foo at bar.com>', 'http://bar.com/foo']
+        self.assertEqual(
+            ('Foo Bar <foo at bar.com>', 'http://bar.com/foo'),
+            h.upstream_contact)
+        self.assertEqual(
+            '\n Foo Bar <foo at bar.com>\n http://bar.com/foo',
+            h['upstream-contact'])
+
+
+if __name__ == '__main__':
+    unittest.main()
-- 
2.1.0




More information about the pkg-python-debian-maint mailing list