Bug#869154: [PATCH] comparators.xml: added new XML comparator
Juliana Rodrigues
juliana.orod at gmail.com
Fri Jul 21 02:56:03 UTC 2017
Source: diffoscope
Version: 84
Severity: wishlist
Added XML Comparator as requested in our wishlist.
This patch closes #866120 and shows XML some love.
Signed-off-by: Juliana Rodrigues <juliana.orod at gmail.com>
---
diffoscope/comparators/__init__.py | 1 +
diffoscope/comparators/xml.py | 101 +++++++++++++++++++++++++++++++++++++
tests/comparators/test_xml.py | 49 ++++++++++++++++++
tests/data/test1.xml | 9 ++++
tests/data/test2.xml | 9 ++++
tests/data/test_invalid.xml | 8 +++
tests/data/test_xml_expected_diff | 14 +++++
7 files changed, 191 insertions(+)
create mode 100644 diffoscope/comparators/xml.py
create mode 100644 tests/comparators/test_xml.py
create mode 100644 tests/data/test1.xml
create mode 100644 tests/data/test2.xml
create mode 100644 tests/data/test_invalid.xml
create mode 100644 tests/data/test_xml_expected_diff
diff --git a/diffoscope/comparators/__init__.py b/diffoscope/comparators/__init__.py
index d22aa79..7653741 100644
--- a/diffoscope/comparators/__init__.py
+++ b/diffoscope/comparators/__init__.py
@@ -40,6 +40,7 @@ class ComparatorManager(object):
('ps.PsFile',),
('javascript.JavaScriptFile',),
('json.JSONFile',),
+ ('xml.XMLFile',),
('text.TextFile',),
('bzip2.Bzip2File',),
('cpio.CpioFile',),
diff --git a/diffoscope/comparators/xml.py b/diffoscope/comparators/xml.py
new file mode 100644
index 0000000..d46f1bc
--- /dev/null
+++ b/diffoscope/comparators/xml.py
@@ -0,0 +1,101 @@
+import re
+
+from xml.dom import minidom
+from diffoscope.difference import Difference
+from diffoscope.comparators.utils.file import File
+from xml.parsers.expat import ExpatError
+
+def _format(node):
+ """
+ Removes *inplace* spaces from minidom.Document
+
+ Args:
+ node -- A xml.dom.minidom.Document object
+
+ Returns:
+ void
+ """
+ for n in node.childNodes:
+ if n.nodeType == minidom.Node.TEXT_NODE:
+ if n.nodeValue: n.nodeValue = n.nodeValue.strip()
+ elif n.nodeType == minidom.Node.ELEMENT_NODE:
+ _format(n)
+
+def _parse(file):
+ """
+ Formats a minidom.Document file and returns XML as string.
+
+ Args:
+ file -- An io.TextIOWrapper object
+
+ Returns:
+ str: formated string object
+ """
+ xml = minidom.parse(file)
+ _format(xml)
+ xml.normalize()
+ return xml.toprettyxml(indent=2*' ')
+
+
+class XMLFile(File):
+ """
+ XML Files Comparison class
+
+ Attributes:
+ RE_FILE_EXTENSION (SRE_Pattern): xml file extension pattern
+ """
+ RE_FILE_EXTENSION = re.compile(r'\.xml$')
+
+ @staticmethod
+ def recognizes(file):
+ """
+ Identifies if a given file has XML extension
+
+ Args:
+ file - a diffoscope.comparators.utils.file.File object
+
+ Returns:
+ False if file is not a XML File, True otherwise
+ """
+ if XMLFile.RE_FILE_EXTENSION.search(file.name) is None:
+ return False
+
+ with open(file.path) as f:
+ try:
+ file.parsed = _parse(f)
+ except ExpatError:
+ return False
+
+ return True
+
+ def compare_details(self, other, source=None):
+ """
+ Compares self.object with another, returning a Difference object
+
+ Args:
+ other -- A XMLFile object
+ source
+
+ Returns:
+ A diffoscope.difference.Difference object
+ """
+ return [ Difference.from_text(self.dumps(self), self.dumps(other),
+ self.path, other.path)]
+
+ def dumps(self, file):
+ """
+ Opens a XMLFile and returns its parsed content
+
+ Args:
+ file -- XMLFile object
+
+ Returns:
+ str -- Formatted XML content from file
+ """
+ if file.parsed:
+ return file.parsed
+
+ with open(file.path) as f:
+ return _parse(f)
+
+
diff --git a/tests/comparators/test_xml.py b/tests/comparators/test_xml.py
new file mode 100644
index 0000000..e8e0aed
--- /dev/null
+++ b/tests/comparators/test_xml.py
@@ -0,0 +1,49 @@
+# -*- coding: utf-8 -*-
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2016 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
+
+import pytest
+
+from diffoscope.comparators.xml import XMLFile
+
+from ..utils.data import load_fixture, get_data
+from ..utils.nonexisting import assert_non_existing
+
+
+xml_a = load_fixture('test1.xml')
+xml_b = load_fixture('test2.xml')
+invalid_xml = load_fixture('test_invalid.xml')
+
+def test_identification(xml_a):
+ assert isinstance(xml_a, XMLFile)
+
+def test_invalid(invalid_xml):
+ assert not isinstance(invalid_xml, XMLFile)
+
+def test_no_differences(xml_a):
+ assert xml_a.compare(xml_a) is None
+
+ at pytest.fixture
+def differences(xml_a, xml_b):
+ return xml_a.compare(xml_b).details
+
+def test_diff(differences):
+ expected_diff = get_data('test_xml_expected_diff')
+ assert differences[0].unified_diff == expected_diff
+
+
diff --git a/tests/data/test1.xml b/tests/data/test1.xml
new file mode 100644
index 0000000..b02bf09
--- /dev/null
+++ b/tests/data/test1.xml
@@ -0,0 +1,9 @@
+<note>
+ <style type="text/css" id="night-mode-pro-style" />
+ <link type="text/css" rel="stylesheet"
+ id="night-mode-pro-link" />
+ <to>Tove</to>
+ <from>Jani</from>
+ <heading>Reminder</heading>
+ <body>Don't forget me this weekend!</body>
+</note>
diff --git a/tests/data/test2.xml b/tests/data/test2.xml
new file mode 100644
index 0000000..7e892f8
--- /dev/null
+++ b/tests/data/test2.xml
@@ -0,0 +1,9 @@
+<note>
+ <style type="text/css" id="night-mode-pro-style" />
+ <link type="text/css" rel="stylesheet"
+ id="night-mode-pro-link" />
+ <to>Jani</to>
+ <from>Toni</from>
+ <heading>Re: Reminder</heading>
+ <body>Pick me up on 5!</body>
+</note>
diff --git a/tests/data/test_invalid.xml b/tests/data/test_invalid.xml
new file mode 100644
index 0000000..2a4cd51
--- /dev/null
+++ b/tests/data/test_invalid.xml
@@ -0,0 +1,8 @@
+<note>
+ style type="text/css" id="night-mode-pro-style" />
+ <link type="text/css" rel="stylesheet"
+ id="night-mode-pro-link" />
+ <to>Tove</to>
+ from>Jani</from>
+ <heading>Reminder</heading>
+ <body>Don't forget me this weekend!</body>
diff --git a/tests/data/test_xml_expected_diff b/tests/data/test_xml_expected_diff
new file mode 100644
index 0000000..0b450cb
--- /dev/null
+++ b/tests/data/test_xml_expected_diff
@@ -0,0 +1,14 @@
+@@ -1,9 +1,9 @@
+ <?xml version="1.0" ?>
+ <note>
+ <style id="night-mode-pro-style" type="text/css"/>
+ <link id="night-mode-pro-link" rel="stylesheet" type="text/css"/>
+- <to>Tove</to>
+- <from>Jani</from>
+- <heading>Reminder</heading>
+- <body>Don't forget me this weekend!</body>
++ <to>Jani</to>
++ <from>Toni</from>
++ <heading>Re: Reminder</heading>
++ <body>Pick me up on 5!</body>
+ </note>
--
2.13.2
More information about the Reproducible-builds
mailing list