[PATCH 09/12] copyright: Parse the rest of the paragraphs.

Sun Aug 31 21:26:15 UTC 2014

From: John Wright <jsw at google.com>

This change adds accessor methods to Copyright for the rest of the
paragraphs, and the ability to dump a parsed Copyright object back to
its original text format.
---
 lib/debian/copyright.py | 81 +++++++++++++++++++++++++++++++++++++++++++++--
 tests/test_copyright.py | 84 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 162 insertions(+), 3 deletions(-)

diff --git a/lib/debian/copyright.py b/lib/debian/copyright.py
index 7e82e0b..70e1542 100644
--- a/lib/debian/copyright.py
+++ b/lib/debian/copyright.py
@@ -72,12 +72,24 @@ class Copyright(object):
         """
         super(Copyright, self).__init__()
 
+        self.__paragraphs = []
+
         if sequence is not None:
             paragraphs = list(deb822.Deb822.iter_paragraphs(
                 sequence=sequence, encoding=encoding))
-            if len(paragraphs) > 0:
-                self.__header = Header(paragraphs[0])
-            # TODO(jsw): Parse the rest of the paragraphs.
+            if not paragraphs:
+                raise NotMachineReadableError('no paragraphs in input')
+            self.__header = Header(paragraphs[0])
+            for i in range(1, len(paragraphs)):
+                p = paragraphs[i]
+                if 'Files' in p:
+                    p = FilesParagraph(p)
+                elif 'License' in p:
+                    p = LicenseParagraph(p)
+                else:
+                    warnings.warn('Non-header paragraph has neither "Files"'
+                                  ' nor "License" fields')
+                self.__paragraphs.append(p)
         else:
             self.__header = Header()
 
@@ -92,6 +104,69 @@ class Copyright(object):
             raise TypeError('value must be a Header object')
         self.__header = hdr
 
+    def all_files_paragraphs(self):
+        """Returns an iterator over the contained FilesParagraph objects."""
+        return (p for p in self.__paragraphs if isinstance(p, FilesParagraph))
+
+    def find_files_paragraph(self, filename):
+        """Returns the FilesParagraph for the given filename.
+
+        In accordance with the spec, this method returns the last FilesParagraph
+        that matches the filename.  If no paragraphs matched, returns None.
+        """
+        result = None
+        for p in self.all_files_paragraphs():
+            if p.matches(filename):
+                result = p
+        return result
+
+    def add_files_paragraph(self, paragraph):
+        """Adds a FilesParagraph to this object.
+
+        The paragraph is inserted directly after the last FilesParagraph (which
+        might be before a standalone LicenseParagraph).
+        """
+        if not isinstance(paragraph, FilesParagraph):
+            raise TypeError('paragraph must be a FilesParagraph instance')
+
+        last_i = -1
+        for i, p in enumerate(self.__paragraphs):
+            if isinstance(p, FilesParagraph):
+                last_i = i
+        self.__paragraphs.insert(last_i + 1, paragraph)
+
+    def all_license_paragraphs(self):
+        """Returns an iterator over standalone LicenseParagraph objects."""
+        return (p for p in self.__paragraphs if isinstance(p, LicenseParagraph))
+
+    def add_license_paragraph(self, paragraph):
+        """Adds a LicenceParagraph to this object.
+
+        The paragraph is inserted after any other paragraphs.
+        """
+        if not isinstance(paragraph, LicenseParagraph):
+            raise TypeError('paragraph must be a LicenseParagraph instance')
+        self.__paragraphs.append(paragraph)
+
+    def dump(self, f=None):
+        """Dumps the contents of the copyright file.
+
+        If f is None, returns a unicode object.  Otherwise, writes the contents
+        to f, which must be a file-like object that is opened in text mode
+        (i.e. that accepts unicode objects directly).  It is thus up to the
+        caller to arrange for the file to do any appropriate encoding.
+        """
+        return_string = False
+        if f is None:
+            return_string = True
+            f = io.StringIO()
+        self.header.dump(f, text_mode=True)
+        for p in self.__paragraphs:
+            f.write('\n')
+            p.dump(f, text_mode=True)
+        if return_string:
+            return f.getvalue()
+
 
 def _single_line(s):
     """Returns s if it is a single line; otherwise raises ValueError."""
diff --git a/tests/test_copyright.py b/tests/test_copyright.py
index 29c9f10..ed9cdda 100755
--- a/tests/test_copyright.py
+++ b/tests/test_copyright.py
@@ -86,6 +86,35 @@ On Debian systems, the full text of the GNU General Public
 License version 2 can be found in the file
 `/usr/share/common-licenses/GPL-2'."""
 
+MULTI_LICENSE = """\
+Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Upstream-Name: Project Y
+
+Files: *
+Copyright: Copyright 2000 Company A
+License: ABC
+
+Files: src/baz.*
+Copyright: Copyright 2000 Company A
+           Copyright 2001 Company B
+License: ABC
+
+License: ABC
+ [ABC TEXT]
+
+Files: debian/*
+Copyright: Copyright 2003 Debian Developer <someone at debian.org>
+License: 123
+
+Files: debian/rules
+Copyright: Copyright 2003 Debian Developer <someone at debian.org>
+           Copyright 2004 Someone Else <foo at bar.com>
+License: 123
+
+License: 123
+ [123 TEXT]
+"""
+
 FORMAT = 'http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/'
 
 
@@ -231,6 +260,61 @@ class CopyrightTest(unittest.TestCase):
         self.assertEqual('ftp://ftp.example.com/pub/games', c.header['Source'])
         self.assertIsNone(c.header.license)
 
+    def test_parse_and_dump(self):
+        c = copyright.Copyright(sequence=SIMPLE.splitlines())
+        dumped = c.dump()
+        self.assertEqual(SIMPLE, dumped)
+
+    def test_all_files_paragraphs(self):
+        c = copyright.Copyright(sequence=SIMPLE.splitlines())
+        self.assertEqual(
+            [('*',), ('debian/*',)],
+            [fp.files for fp in c.all_files_paragraphs()])
+
+        c = copyright.Copyright()
+        self.assertEqual([], list(c.all_files_paragraphs()))
+
+    def test_find_files_paragraph(self):
+        c = copyright.Copyright(sequence=SIMPLE.splitlines())
+        paragraphs = list(c.all_files_paragraphs())
+
+        self.assertIs(paragraphs[0], c.find_files_paragraph('Makefile'))
+        self.assertIs(paragraphs[0], c.find_files_paragraph('src/foo.cc'))
+        self.assertIs(paragraphs[1], c.find_files_paragraph('debian/rules'))
+        self.assertIs(paragraphs[1], c.find_files_paragraph('debian/a/b.py'))
+
+    def test_find_files_paragraph_some_unmatched(self):
+        c = copyright.Copyright()
+        files1 = copyright.FilesParagraph.create(
+            ['foo/*'], 'CompanyA', copyright.License('ISC'))
+        files2 = copyright.FilesParagraph.create(
+            ['bar/*'], 'CompanyB', copyright.License('Apache'))
+        c.add_files_paragraph(files1)
+        c.add_files_paragraph(files2)
+        self.assertIs(files1, c.find_files_paragraph('foo/bar.cc'))
+        self.assertIs(files2, c.find_files_paragraph('bar/baz.cc'))
+        self.assertIsNone(c.find_files_paragraph('baz/quux.cc'))
+        self.assertIsNone(c.find_files_paragraph('Makefile'))
+
+    def test_all_license_paragraphs(self):
+        c = copyright.Copyright(sequence=SIMPLE.splitlines())
+        self.assertEqual([], list(c.all_license_paragraphs()))
+
+        c = copyright.Copyright(MULTI_LICENSE.splitlines())
+        self.assertEqual(
+            [copyright.License('ABC', '[ABC TEXT]'),
+             copyright.License('123', '[123 TEXT]')],
+            list(p.license for p in c.all_license_paragraphs()))
+
+        c.add_license_paragraph(copyright.LicenseParagraph.create(
+            copyright.License('Foo', '[FOO TEXT]')))
+        self.assertEqual(
+            [copyright.License('ABC', '[ABC TEXT]'),
+             copyright.License('123', '[123 TEXT]'),
+             copyright.License('Foo', '[FOO TEXT]')],
+            list(p.license for p in c.all_license_paragraphs()))
+
+
 
 class MultlineTest(unittest.TestCase):
     """Test cases for format_multiline{,_lines} and parse_multline{,_as_lines}.
-- 
2.1.0