Bug#869154: [PATCH] comparators.xml: added new XML comparator

Juliana Rodrigues juliana.orod at gmail.com
Fri Jul 21 02:56:03 UTC 2017


Source: diffoscope
Version: 84
Severity: wishlist

Added XML Comparator as requested in our wishlist.
This patch closes #866120 and shows XML some love.

Signed-off-by: Juliana Rodrigues <juliana.orod at gmail.com>
---
 diffoscope/comparators/__init__.py |   1 +
 diffoscope/comparators/xml.py      | 101 +++++++++++++++++++++++++++++++++++++
 tests/comparators/test_xml.py      |  49 ++++++++++++++++++
 tests/data/test1.xml               |   9 ++++
 tests/data/test2.xml               |   9 ++++
 tests/data/test_invalid.xml        |   8 +++
 tests/data/test_xml_expected_diff  |  14 +++++
 7 files changed, 191 insertions(+)
 create mode 100644 diffoscope/comparators/xml.py
 create mode 100644 tests/comparators/test_xml.py
 create mode 100644 tests/data/test1.xml
 create mode 100644 tests/data/test2.xml
 create mode 100644 tests/data/test_invalid.xml
 create mode 100644 tests/data/test_xml_expected_diff

diff --git a/diffoscope/comparators/__init__.py b/diffoscope/comparators/__init__.py
index d22aa79..7653741 100644
--- a/diffoscope/comparators/__init__.py
+++ b/diffoscope/comparators/__init__.py
@@ -40,6 +40,7 @@ class ComparatorManager(object):
         ('ps.PsFile',),
         ('javascript.JavaScriptFile',),
         ('json.JSONFile',),
+        ('xml.XMLFile',),
         ('text.TextFile',),
         ('bzip2.Bzip2File',),
         ('cpio.CpioFile',),
diff --git a/diffoscope/comparators/xml.py b/diffoscope/comparators/xml.py
new file mode 100644
index 0000000..d46f1bc
--- /dev/null
+++ b/diffoscope/comparators/xml.py
@@ -0,0 +1,101 @@
+import re
+
+from xml.dom import minidom
+from diffoscope.difference import Difference
+from diffoscope.comparators.utils.file import File
+from xml.parsers.expat import ExpatError
+
+def _format(node):
+  """
+  Removes *inplace* spaces from minidom.Document
+
+  Args:
+    node -- A xml.dom.minidom.Document object
+
+  Returns:
+    void
+  """
+  for n in node.childNodes:
+    if n.nodeType == minidom.Node.TEXT_NODE:
+      if n.nodeValue: n.nodeValue = n.nodeValue.strip()
+    elif n.nodeType == minidom.Node.ELEMENT_NODE:
+      _format(n)
+
+def _parse(file):
+  """
+  Formats a minidom.Document file and returns XML as string.
+
+  Args:
+    file -- An io.TextIOWrapper object
+
+  Returns:
+    str: formated string object
+  """
+  xml = minidom.parse(file)
+  _format(xml)
+  xml.normalize()
+  return xml.toprettyxml(indent=2*' ')
+
+
+class XMLFile(File):
+  """
+  XML Files Comparison class
+
+  Attributes:
+    RE_FILE_EXTENSION (SRE_Pattern): xml file extension pattern
+  """
+  RE_FILE_EXTENSION = re.compile(r'\.xml$')
+
+  @staticmethod
+  def recognizes(file):
+    """
+    Identifies if a given file has XML extension
+
+    Args:
+      file - a diffoscope.comparators.utils.file.File object
+
+    Returns:
+      False if file is not a XML File, True otherwise
+    """
+    if XMLFile.RE_FILE_EXTENSION.search(file.name) is None:
+      return False
+
+    with open(file.path) as f:
+      try:
+        file.parsed = _parse(f)
+      except ExpatError:
+        return False
+
+    return True
+
+  def compare_details(self, other, source=None):
+    """
+    Compares self.object with another, returning a Difference object
+
+    Args:
+      other  -- A XMLFile object
+      source
+
+    Returns:
+      A diffoscope.difference.Difference object
+    """
+    return [ Difference.from_text(self.dumps(self), self.dumps(other),
+      self.path, other.path)]
+
+  def dumps(self, file):
+    """
+    Opens a XMLFile and returns its parsed content
+
+    Args:
+      file -- XMLFile object
+
+    Returns:
+      str -- Formatted XML content from file
+    """
+    if file.parsed:
+      return file.parsed
+
+    with open(file.path) as f:
+      return _parse(f)
+
+
diff --git a/tests/comparators/test_xml.py b/tests/comparators/test_xml.py
new file mode 100644
index 0000000..e8e0aed
--- /dev/null
+++ b/tests/comparators/test_xml.py
@@ -0,0 +1,49 @@
+# -*- coding: utf-8 -*-
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2016 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
+
+import pytest
+
+from diffoscope.comparators.xml import XMLFile
+
+from ..utils.data import load_fixture, get_data
+from ..utils.nonexisting import assert_non_existing
+
+
+xml_a = load_fixture('test1.xml')
+xml_b = load_fixture('test2.xml')
+invalid_xml = load_fixture('test_invalid.xml')
+
+def test_identification(xml_a):
+    assert isinstance(xml_a, XMLFile)
+
+def test_invalid(invalid_xml):
+    assert not isinstance(invalid_xml, XMLFile)
+
+def test_no_differences(xml_a):
+    assert xml_a.compare(xml_a) is None
+
+ at pytest.fixture
+def differences(xml_a, xml_b):
+    return xml_a.compare(xml_b).details
+
+def test_diff(differences):
+    expected_diff = get_data('test_xml_expected_diff')
+    assert differences[0].unified_diff == expected_diff
+
+
diff --git a/tests/data/test1.xml b/tests/data/test1.xml
new file mode 100644
index 0000000..b02bf09
--- /dev/null
+++ b/tests/data/test1.xml
@@ -0,0 +1,9 @@
+<note>
+        <style type="text/css" id="night-mode-pro-style" />
+        <link type="text/css" rel="stylesheet"
+        id="night-mode-pro-link" />
+        <to>Tove</to>
+        <from>Jani</from>
+        <heading>Reminder</heading>
+        <body>Don't forget me this weekend!</body>
+</note>
diff --git a/tests/data/test2.xml b/tests/data/test2.xml
new file mode 100644
index 0000000..7e892f8
--- /dev/null
+++ b/tests/data/test2.xml
@@ -0,0 +1,9 @@
+<note>
+        <style type="text/css" id="night-mode-pro-style" />
+        <link type="text/css" rel="stylesheet"
+        id="night-mode-pro-link" />
+        <to>Jani</to>
+        <from>Toni</from>
+        <heading>Re: Reminder</heading>
+        <body>Pick me up on 5!</body>
+</note>
diff --git a/tests/data/test_invalid.xml b/tests/data/test_invalid.xml
new file mode 100644
index 0000000..2a4cd51
--- /dev/null
+++ b/tests/data/test_invalid.xml
@@ -0,0 +1,8 @@
+<note>
+        style type="text/css" id="night-mode-pro-style" />
+        <link type="text/css" rel="stylesheet"
+        id="night-mode-pro-link" />
+        <to>Tove</to>
+        from>Jani</from>
+        <heading>Reminder</heading>
+        <body>Don't forget me this weekend!</body>
diff --git a/tests/data/test_xml_expected_diff b/tests/data/test_xml_expected_diff
new file mode 100644
index 0000000..0b450cb
--- /dev/null
+++ b/tests/data/test_xml_expected_diff
@@ -0,0 +1,14 @@
+@@ -1,9 +1,9 @@
+ <?xml version="1.0" ?>
+ <note>
+   <style id="night-mode-pro-style" type="text/css"/>
+   <link id="night-mode-pro-link" rel="stylesheet" type="text/css"/>
+-  <to>Tove</to>
+-  <from>Jani</from>
+-  <heading>Reminder</heading>
+-  <body>Don't forget me this weekend!</body>
++  <to>Jani</to>
++  <from>Toni</from>
++  <heading>Re: Reminder</heading>
++  <body>Pick me up on 5!</body>
+ </note>
-- 
2.13.2



More information about the Reproducible-builds mailing list