[Pkg-privacy-commits] [Git][pkg-privacy-team/mat2][upstream] New upstream version 0.12.4

Georg Faerber (@georg) georg at debian.org
Sat May 7 13:20:17 BST 2022



Georg Faerber pushed to branch upstream at Privacy Maintainers / mat2


Commits:
450ecda2 by Georg Faerber at 2022-05-07T12:12:25+00:00
New upstream version 0.12.4
- - - - -


11 changed files:

- .gitlab-ci.yml
- CHANGELOG.md
- README.md
- doc/mat2.1
- libmat2/images.py
- libmat2/pdf.py
- libmat2/web.py
- mat2
- setup.py
- tests/test_climat2.py
- tests/test_libmat2.py


Changes:

=====================================
.gitlab-ci.yml
=====================================
@@ -1,3 +1,6 @@
+include:
+  - template: Security/SAST.gitlab-ci.yml
+
 variables:
   CONTAINER_REGISTRY: $CI_REGISTRY/georg/mat2-ci-images
 


=====================================
CHANGELOG.md
=====================================
@@ -1,3 +1,12 @@
+# 0.12.4 - 2022-04-30
+
+- Fix possible errors/crashes when processing multiple files
+  via the command line interface
+- Use a fixed PDF version for the output
+- Improve compatibility with modern versions of rsvg
+- Improve the robustness of the command line interface with
+  regard to control characters
+
 # 0.12.3 - 2022-01-06
 
 - Implement code for internationalization


=====================================
README.md
=====================================
@@ -136,6 +136,8 @@ of the guarantee that mat2 won't modify the data of their files, there is the
 	watermarks from PDF.
 - [Scrambled Exif](https://f-droid.org/packages/com.jarsilio.android.scrambledeggsif/),
 	an open-source Android application to remove metadata from pictures.
+- [Dangerzone](https://dangerzone.rocks/), designed to sanitize harmful documents
+  into harmless ones.
 
 # Contact
 


=====================================
doc/mat2.1
=====================================
@@ -1,4 +1,4 @@
-.TH mat2 "1" "January 2022" "mat2 0.12.3" "User Commands"
+.TH mat2 "1" "April 2022" "mat2 0.12.4" "User Commands"
 
 .SH NAME
 mat2 \- the metadata anonymisation toolkit 2


=====================================
libmat2/images.py
=====================================
@@ -30,12 +30,23 @@ class SVGParser(exiftool.ExiftoolParser):
             svg = Rsvg.Handle.new_from_file(self.filename)
         except GLib.GError:
             raise ValueError
-        dimensions = svg.get_dimensions()
-        surface = cairo.SVGSurface(self.output_filename,
-                                   dimensions.height,
-                                   dimensions.width)
+
+        try:
+            _, _, _, _, has_viewbox, viewbox = svg.get_intrinsic_dimensions()
+            if has_viewbox is False:
+                raise ValueError
+            _, width, height = svg.get_intrinsic_size_in_pixels()
+        except AttributeError:
+            dimensions = svg.get_dimensions()
+            height, width = dimensions.height, dimensions.width
+
+        surface = cairo.SVGSurface(self.output_filename, height, width)
         context = cairo.Context(surface)
-        svg.render_cairo(context)
+        try:
+            svg.render_document(context, viewbox)
+        except AttributeError:
+            svg.render_cairo(context)
+
         surface.finish()
         return True
 


=====================================
libmat2/pdf.py
=====================================
@@ -22,6 +22,7 @@ if LooseVersion(poppler_version) < LooseVersion('0.46'):  # pragma: no cover
     raise ValueError("mat2 needs at least Poppler version 0.46 to work. \
 The installed version is %s." % poppler_version)  # pragma: no cover
 
+FIXED_PDF_VERSION = cairo.PDFVersion.VERSION_1_5
 
 class PDFParser(abstract.AbstractParser):
     mimetypes = {'application/pdf', }
@@ -52,6 +53,7 @@ class PDFParser(abstract.AbstractParser):
 
         tmp_path = tempfile.mkstemp()[1]
         pdf_surface = cairo.PDFSurface(tmp_path, 10, 10)  # resized later anyway
+        pdf_surface.restrict_to_version(FIXED_PDF_VERSION)
         pdf_context = cairo.Context(pdf_surface)  # context draws on the surface
 
         for pagenum in range(pages_count):
@@ -80,6 +82,7 @@ class PDFParser(abstract.AbstractParser):
 
         _, tmp_path = tempfile.mkstemp()
         pdf_surface = cairo.PDFSurface(tmp_path, 32, 32)  # resized later anyway
+        pdf_surface.restrict_to_version(FIXED_PDF_VERSION)
         pdf_context = cairo.Context(pdf_surface)
 
         for pagenum in range(pages_count):


=====================================
libmat2/web.py
=====================================
@@ -104,6 +104,7 @@ class _HTMLParser(parser.HTMLParser):
         self.tag_required_blocklist = required_blocklisted_tags
         self.tag_blocklist = blocklisted_tags
 
+    # pylint: disable=R0201
     def error(self, message):  # pragma: no cover
         """ Amusingly, Python's documentation doesn't mention that this
         function needs to be implemented in subclasses of the parent class


=====================================
mat2
=====================================
@@ -17,7 +17,7 @@ except ValueError as ex:
     print(ex)
     sys.exit(1)
 
-__version__ = '0.12.3'
+__version__ = '0.12.4'
 
 # Make pyflakes happy
 assert Set
@@ -26,13 +26,19 @@ assert Union
 
 logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING)
 
+def __print_without_chars(s: str):
+    """ Remove control characters
+    We might use 'Cc' instead of 'C', but better safe than sorry
+    https://www.unicode.org/reports/tr44/#GC_Values_Table
+    """
+    print(''.join(ch for ch in s if not unicodedata.category(ch).startswith('C')))
 
 def __check_file(filename: str, mode: int = os.R_OK) -> bool:
     if not os.path.exists(filename):
-        print("[-] %s doesn't exist." % filename)
+        __print_without_chars("[-] %s doesn't exist." % filename)
         return False
     elif not os.path.isfile(filename):
-        print("[-] %s is not a regular file." % filename)
+        __print_without_chars("[-] %s is not a regular file." % filename)
         return False
     elif not os.access(filename, mode):
         mode_str = []  # type: List[str]
@@ -40,7 +46,7 @@ def __check_file(filename: str, mode: int = os.R_OK) -> bool:
             mode_str += 'readable'
         if mode & os.W_OK:
             mode_str += 'writeable'
-        print("[-] %s is not %s." % (filename, 'nor '.join(mode_str)))
+        __print_without_chars("[-] %s is not %s." % (filename, 'nor '.join(mode_str)))
         return False
     return True
 
@@ -88,10 +94,10 @@ def show_meta(filename: str, sandbox: bool):
     try:
         p, mtype = parser_factory.get_parser(filename)  # type: ignore
     except ValueError as e:
-        print("[-] something went wrong when processing %s: %s" % (filename, e))
+        __print_without_chars("[-] something went wrong when processing %s: %s" % (filename, e))
         return
     if p is None:
-        print("[-] %s's format (%s) is not supported" % (filename, mtype))
+        __print_without_chars("[-] %s's format (%s) is not supported" % (filename, mtype))
         return
     p.sandbox = sandbox
     __print_meta(filename, p.get_meta())
@@ -100,28 +106,22 @@ def show_meta(filename: str, sandbox: bool):
 def __print_meta(filename: str, metadata: dict, depth: int = 1):
     padding = " " * depth*2
     if not metadata:
-        print(padding + "No metadata found in %s." % filename)
+        __print_without_chars(padding + "No metadata found in %s." % filename)
         return
 
-    print("[%s] Metadata for %s:" % ('+'*depth, filename))
+    __print_without_chars("[%s] Metadata for %s:" % ('+'*depth, filename))
 
     for (k, v) in sorted(metadata.items()):
         if isinstance(v, dict):
             __print_meta(k, v, depth+1)
             continue
 
-        # Remove control characters
-        # We might use 'Cc' instead of 'C', but better safe than sorry
-        # https://www.unicode.org/reports/tr44/#GC_Values_Table
-        try:
-            v = ''.join(ch for ch in v if not unicodedata.category(ch).startswith('C'))
-        except TypeError:
-            pass  # for things that aren't iterable
-
         try:  # FIXME this is ugly.
-            print(padding + "  %s: %s" % (k, v))
+            __print_without_chars(padding + "  %s: %s" % (k, v))
         except UnicodeEncodeError:
-            print(padding + "  %s: harmful content" % k)
+            __print_without_chars(padding + "  %s: harmful content" % k)
+        except TypeError:
+            pass  # for things that aren't iterable
 
 
 def clean_meta(filename: str, is_lightweight: bool, inplace: bool, sandbox: bool,
@@ -133,10 +133,10 @@ def clean_meta(filename: str, is_lightweight: bool, inplace: bool, sandbox: bool
     try:
         p, mtype = parser_factory.get_parser(filename)  # type: ignore
     except ValueError as e:
-        print("[-] something went wrong when cleaning %s: %s" % (filename, e))
+        __print_without_chars("[-] something went wrong when cleaning %s: %s" % (filename, e))
         return False
     if p is None:
-        print("[-] %s's format (%s) is not supported" % (filename, mtype))
+        __print_without_chars("[-] %s's format (%s) is not supported" % (filename, mtype))
         return False
     p.unknown_member_policy = policy
     p.lightweight_cleaning = is_lightweight
@@ -151,7 +151,7 @@ def clean_meta(filename: str, is_lightweight: bool, inplace: bool, sandbox: bool
                 os.rename(p.output_filename, filename)
         return ret
     except RuntimeError as e:
-        print("[-] %s can't be cleaned: %s" % (filename, e))
+        __print_without_chars("[-] %s can't be cleaned: %s" % (filename, e))
     return False
 
 
@@ -169,7 +169,7 @@ def show_parsers():
                 # mimetype, so there is not point in showing the mimetype at all
                 continue
             formats.add('  - %s (%s)' % (mtype, ', '.join(extensions)))
-    print('\n'.join(sorted(formats)))
+    __print_without_chars('\n'.join(sorted(formats)))
 
 
 def __get_files_recursively(files: List[str]) -> List[str]:
@@ -198,9 +198,9 @@ def main() -> int:
             show_parsers()
             return 0
         elif args.check_dependencies:
-            print("Dependencies for mat2 %s:" % __version__)
+            __print_without_chars("Dependencies for mat2 %s:" % __version__)
             for key, value in sorted(check_dependencies().items()):
-                print('- %s: %s %s' % (key, 'yes' if value['found'] else 'no',
+                __print_without_chars('- %s: %s %s' % (key, 'yes' if value['found'] else 'no',
                                        '(optional)' if not value['required'] else ''))
         else:
             arg_parser.print_help()
@@ -221,14 +221,14 @@ def main() -> int:
         files = __get_files_recursively(args.files)
         # We have to use Processes instead of Threads, since
         # we're using tempfile.mkdtemp, which isn't thread-safe.
+        futures = list()
         with concurrent.futures.ProcessPoolExecutor() as executor:
-            futures = list()
             for f in files:
                 future = executor.submit(clean_meta, f, args.lightweight,
                                          inplace, args.sandbox, policy)
                 futures.append(future)
-            for future in concurrent.futures.as_completed(futures):
-                no_failure &= future.result()
+        for future in concurrent.futures.as_completed(futures):
+            no_failure &= future.result()
         return 0 if no_failure is True else -1
 
 


=====================================
setup.py
=====================================
@@ -5,7 +5,7 @@ with open("README.md", encoding='utf-8') as fh:
 
 setuptools.setup(
     name="mat2",
-    version='0.12.3',
+    version='0.12.4',
     author="Julien (jvoisin) Voisin",
     author_email="julien.voisin+mat2 at dustri.org",
     description="A handy tool to trash your metadata",


=====================================
tests/test_climat2.py
=====================================
@@ -1,4 +1,3 @@
-import sys
 import random
 import os
 import shutil
@@ -267,14 +266,7 @@ class TestCommandLineParallel(unittest.TestCase):
     def test_different(self):
         src = './tests/data/'
         dst = './tests/data/parallel'
-        if sys.version_info >= (3, 8):
-            with os.scandir(src) as itr:
-                    entries = list(itr)
-            shutil._copytree(entries=entries, src=src, dst=dst, symlinks=False,
-                    ignore=None, copy_function=shutil.copy2,
-                    ignore_dangling_symlinks=False)
-        else:
-            shutil.copytree(src, dst)
+        shutil.copytree(src, dst)
 
         proc = subprocess.Popen(mat2_binary + glob.glob('./tests/data/parallel/dirty.*'),
                 stdout=subprocess.PIPE)
@@ -286,7 +278,7 @@ class TestCommandLineParallel(unittest.TestCase):
             self.assertIsNotNone(p)
             p = parser_factory.get_parser(p.output_filename)
             self.assertEqual(p.get_meta(), {})
-        shutil.rmtree('./tests/data/parallel')
+        shutil.rmtree('./tests/data/parallel/')
 
     def test_faulty(self):
         for i in range(self.iterations):


=====================================
tests/test_libmat2.py
=====================================
@@ -445,7 +445,10 @@ class TestCleaning(unittest.TestCase):
             'meta': {
                 'WorkDescription': "This is a test svg image for mat2's testsuite",
             },
-            'expected_meta': {},
+            'expected_meta': {
+                'ImageSize': '128x128',
+                'Megapixels': '0.016',
+            },
         } ,{
             'name': 'ppm',
             'parser': images.PPMParser,
@@ -506,41 +509,42 @@ class TestCleaning(unittest.TestCase):
 
     def test_all_parametred(self):
         for case in self.data:
-            if 'ffmpeg' in case:
-                try:
-                    video._get_ffmpeg_path()
-                except RuntimeError:
-                    raise unittest.SkipTest
-
-            print('[+] Testing %s' % case['name'])
-            target = './tests/data/clean.' + case['name']
-            shutil.copy('./tests/data/dirty.' + case['name'], target)
-            p1 = case['parser'](target)
-
-            for k, v in p1.get_meta().items():
-                if k not in case['meta']:
-                    continue
-                if isinstance(v, dict):
-                    for _k, _v in v.items():
-                        if _k in case['meta'][k]:
-                            self.assertEqual(_v, case['meta'][k][_k])
-                else:
-                    self.assertEqual(v, case['meta'][k])
-
-            p1.lightweight_cleaning = True
-            self.assertTrue(p1.remove_all())
-
-            p2 = case['parser'](p1.output_filename)
-            meta = p2.get_meta()
-            if meta:
-                for k, v in p2.get_meta().items():
-                    self.assertIn(k, case['expected_meta'], '"%s" is not in "%s" (%s)' % (k, case['expected_meta'], case['name']))
-                    self.assertIn(str(case['expected_meta'][k]), str(v))
-            self.assertTrue(p2.remove_all())
-
-            os.remove(target)
-            os.remove(p1.output_filename)
-            os.remove(p2.output_filename)
+            with self.subTest(case=case):
+                if 'ffmpeg' in case:
+                    try:
+                        video._get_ffmpeg_path()
+                    except RuntimeError:
+                        raise unittest.SkipTest
+
+                print('[+] Testing %s' % case['name'])
+                target = './tests/data/clean.' + case['name']
+                shutil.copy('./tests/data/dirty.' + case['name'], target)
+                p1 = case['parser'](target)
+
+                for k, v in p1.get_meta().items():
+                    if k not in case['meta']:
+                        continue
+                    if isinstance(v, dict):
+                        for _k, _v in v.items():
+                            if _k in case['meta'][k]:
+                                self.assertEqual(_v, case['meta'][k][_k])
+                    else:
+                        self.assertEqual(v, case['meta'][k])
+
+                p1.lightweight_cleaning = True
+                self.assertTrue(p1.remove_all())
+
+                p2 = case['parser'](p1.output_filename)
+                meta = p2.get_meta()
+                if meta:
+                    for k, v in p2.get_meta().items():
+                        self.assertIn(k, case['expected_meta'], '"%s" is not in "%s" (%s)' % (k, case['expected_meta'], case['name']))
+                        self.assertIn(str(case['expected_meta'][k]), str(v))
+                self.assertTrue(p2.remove_all())
+
+                os.remove(target)
+                os.remove(p1.output_filename)
+                os.remove(p2.output_filename)
 
 
     def test_html(self):



View it on GitLab: https://salsa.debian.org/pkg-privacy-team/mat2/-/commit/450ecda2c908b0dbfbb8e57ee8f0f038a8b3dfb6

-- 
View it on GitLab: https://salsa.debian.org/pkg-privacy-team/mat2/-/commit/450ecda2c908b0dbfbb8e57ee8f0f038a8b3dfb6
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/pkg-privacy-commits/attachments/20220507/a04c3d45/attachment-0001.htm>


More information about the Pkg-privacy-commits mailing list