[Pkg-privacy-commits] [Git][pkg-privacy-team/mat2][upstream] New upstream version 0.12.4
Georg Faerber (@georg)
georg at debian.org
Sat May 7 13:20:17 BST 2022
Georg Faerber pushed to branch upstream at Privacy Maintainers / mat2
Commits:
450ecda2 by Georg Faerber at 2022-05-07T12:12:25+00:00
New upstream version 0.12.4
- - - - -
11 changed files:
- .gitlab-ci.yml
- CHANGELOG.md
- README.md
- doc/mat2.1
- libmat2/images.py
- libmat2/pdf.py
- libmat2/web.py
- mat2
- setup.py
- tests/test_climat2.py
- tests/test_libmat2.py
Changes:
=====================================
.gitlab-ci.yml
=====================================
@@ -1,3 +1,6 @@
+include:
+ - template: Security/SAST.gitlab-ci.yml
+
variables:
CONTAINER_REGISTRY: $CI_REGISTRY/georg/mat2-ci-images
=====================================
CHANGELOG.md
=====================================
@@ -1,3 +1,12 @@
+# 0.12.4 - 2022-04-30
+
+- Fix possible errors/crashes when processing multiple files
+ via the command line interface
+- Use a fixed PDF version for the output
+- Improve compatibility with modern versions of rsvg
+- Improve the robustness of the command line interface with
+ regard to control characters
+
# 0.12.3 - 2022-01-06
- Implement code for internationalization
=====================================
README.md
=====================================
@@ -136,6 +136,8 @@ of the guarantee that mat2 won't modify the data of their files, there is the
watermarks from PDF.
- [Scrambled Exif](https://f-droid.org/packages/com.jarsilio.android.scrambledeggsif/),
an open-source Android application to remove metadata from pictures.
+- [Dangerzone](https://dangerzone.rocks/), designed to sanitize harmful documents
+ into harmless ones.
# Contact
=====================================
doc/mat2.1
=====================================
@@ -1,4 +1,4 @@
-.TH mat2 "1" "January 2022" "mat2 0.12.3" "User Commands"
+.TH mat2 "1" "April 2022" "mat2 0.12.4" "User Commands"
.SH NAME
mat2 \- the metadata anonymisation toolkit 2
=====================================
libmat2/images.py
=====================================
@@ -30,12 +30,23 @@ class SVGParser(exiftool.ExiftoolParser):
svg = Rsvg.Handle.new_from_file(self.filename)
except GLib.GError:
raise ValueError
- dimensions = svg.get_dimensions()
- surface = cairo.SVGSurface(self.output_filename,
- dimensions.height,
- dimensions.width)
+
+ try:
+ _, _, _, _, has_viewbox, viewbox = svg.get_intrinsic_dimensions()
+ if has_viewbox is False:
+ raise ValueError
+ _, width, height = svg.get_intrinsic_size_in_pixels()
+ except AttributeError:
+ dimensions = svg.get_dimensions()
+ height, width = dimensions.height, dimensions.width
+
+ surface = cairo.SVGSurface(self.output_filename, height, width)
context = cairo.Context(surface)
- svg.render_cairo(context)
+ try:
+ svg.render_document(context, viewbox)
+ except AttributeError:
+ svg.render_cairo(context)
+
surface.finish()
return True
=====================================
libmat2/pdf.py
=====================================
@@ -22,6 +22,7 @@ if LooseVersion(poppler_version) < LooseVersion('0.46'): # pragma: no cover
raise ValueError("mat2 needs at least Poppler version 0.46 to work. \
The installed version is %s." % poppler_version) # pragma: no cover
+FIXED_PDF_VERSION = cairo.PDFVersion.VERSION_1_5
class PDFParser(abstract.AbstractParser):
mimetypes = {'application/pdf', }
@@ -52,6 +53,7 @@ class PDFParser(abstract.AbstractParser):
tmp_path = tempfile.mkstemp()[1]
pdf_surface = cairo.PDFSurface(tmp_path, 10, 10) # resized later anyway
+ pdf_surface.restrict_to_version(FIXED_PDF_VERSION)
pdf_context = cairo.Context(pdf_surface) # context draws on the surface
for pagenum in range(pages_count):
@@ -80,6 +82,7 @@ class PDFParser(abstract.AbstractParser):
_, tmp_path = tempfile.mkstemp()
pdf_surface = cairo.PDFSurface(tmp_path, 32, 32) # resized later anyway
+ pdf_surface.restrict_to_version(FIXED_PDF_VERSION)
pdf_context = cairo.Context(pdf_surface)
for pagenum in range(pages_count):
=====================================
libmat2/web.py
=====================================
@@ -104,6 +104,7 @@ class _HTMLParser(parser.HTMLParser):
self.tag_required_blocklist = required_blocklisted_tags
self.tag_blocklist = blocklisted_tags
+ # pylint: disable=R0201
def error(self, message): # pragma: no cover
""" Amusingly, Python's documentation doesn't mention that this
function needs to be implemented in subclasses of the parent class
=====================================
mat2
=====================================
@@ -17,7 +17,7 @@ except ValueError as ex:
print(ex)
sys.exit(1)
-__version__ = '0.12.3'
+__version__ = '0.12.4'
# Make pyflakes happy
assert Set
@@ -26,13 +26,19 @@ assert Union
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING)
+def __print_without_chars(s: str):
+ """ Remove control characters
+ We might use 'Cc' instead of 'C', but better safe than sorry
+ https://www.unicode.org/reports/tr44/#GC_Values_Table
+ """
+ print(''.join(ch for ch in s if not unicodedata.category(ch).startswith('C')))
def __check_file(filename: str, mode: int = os.R_OK) -> bool:
if not os.path.exists(filename):
- print("[-] %s doesn't exist." % filename)
+ __print_without_chars("[-] %s doesn't exist." % filename)
return False
elif not os.path.isfile(filename):
- print("[-] %s is not a regular file." % filename)
+ __print_without_chars("[-] %s is not a regular file." % filename)
return False
elif not os.access(filename, mode):
mode_str = [] # type: List[str]
@@ -40,7 +46,7 @@ def __check_file(filename: str, mode: int = os.R_OK) -> bool:
mode_str += 'readable'
if mode & os.W_OK:
mode_str += 'writeable'
- print("[-] %s is not %s." % (filename, 'nor '.join(mode_str)))
+ __print_without_chars("[-] %s is not %s." % (filename, 'nor '.join(mode_str)))
return False
return True
@@ -88,10 +94,10 @@ def show_meta(filename: str, sandbox: bool):
try:
p, mtype = parser_factory.get_parser(filename) # type: ignore
except ValueError as e:
- print("[-] something went wrong when processing %s: %s" % (filename, e))
+ __print_without_chars("[-] something went wrong when processing %s: %s" % (filename, e))
return
if p is None:
- print("[-] %s's format (%s) is not supported" % (filename, mtype))
+ __print_without_chars("[-] %s's format (%s) is not supported" % (filename, mtype))
return
p.sandbox = sandbox
__print_meta(filename, p.get_meta())
@@ -100,28 +106,22 @@ def show_meta(filename: str, sandbox: bool):
def __print_meta(filename: str, metadata: dict, depth: int = 1):
padding = " " * depth*2
if not metadata:
- print(padding + "No metadata found in %s." % filename)
+ __print_without_chars(padding + "No metadata found in %s." % filename)
return
- print("[%s] Metadata for %s:" % ('+'*depth, filename))
+ __print_without_chars("[%s] Metadata for %s:" % ('+'*depth, filename))
for (k, v) in sorted(metadata.items()):
if isinstance(v, dict):
__print_meta(k, v, depth+1)
continue
- # Remove control characters
- # We might use 'Cc' instead of 'C', but better safe than sorry
- # https://www.unicode.org/reports/tr44/#GC_Values_Table
- try:
- v = ''.join(ch for ch in v if not unicodedata.category(ch).startswith('C'))
- except TypeError:
- pass # for things that aren't iterable
-
try: # FIXME this is ugly.
- print(padding + " %s: %s" % (k, v))
+ __print_without_chars(padding + " %s: %s" % (k, v))
except UnicodeEncodeError:
- print(padding + " %s: harmful content" % k)
+ __print_without_chars(padding + " %s: harmful content" % k)
+ except TypeError:
+ pass # for things that aren't iterable
def clean_meta(filename: str, is_lightweight: bool, inplace: bool, sandbox: bool,
@@ -133,10 +133,10 @@ def clean_meta(filename: str, is_lightweight: bool, inplace: bool, sandbox: bool
try:
p, mtype = parser_factory.get_parser(filename) # type: ignore
except ValueError as e:
- print("[-] something went wrong when cleaning %s: %s" % (filename, e))
+ __print_without_chars("[-] something went wrong when cleaning %s: %s" % (filename, e))
return False
if p is None:
- print("[-] %s's format (%s) is not supported" % (filename, mtype))
+ __print_without_chars("[-] %s's format (%s) is not supported" % (filename, mtype))
return False
p.unknown_member_policy = policy
p.lightweight_cleaning = is_lightweight
@@ -151,7 +151,7 @@ def clean_meta(filename: str, is_lightweight: bool, inplace: bool, sandbox: bool
os.rename(p.output_filename, filename)
return ret
except RuntimeError as e:
- print("[-] %s can't be cleaned: %s" % (filename, e))
+ __print_without_chars("[-] %s can't be cleaned: %s" % (filename, e))
return False
@@ -169,7 +169,7 @@ def show_parsers():
# mimetype, so there is not point in showing the mimetype at all
continue
formats.add(' - %s (%s)' % (mtype, ', '.join(extensions)))
- print('\n'.join(sorted(formats)))
+ __print_without_chars('\n'.join(sorted(formats)))
def __get_files_recursively(files: List[str]) -> List[str]:
@@ -198,9 +198,9 @@ def main() -> int:
show_parsers()
return 0
elif args.check_dependencies:
- print("Dependencies for mat2 %s:" % __version__)
+ __print_without_chars("Dependencies for mat2 %s:" % __version__)
for key, value in sorted(check_dependencies().items()):
- print('- %s: %s %s' % (key, 'yes' if value['found'] else 'no',
+ __print_without_chars('- %s: %s %s' % (key, 'yes' if value['found'] else 'no',
'(optional)' if not value['required'] else ''))
else:
arg_parser.print_help()
@@ -221,14 +221,14 @@ def main() -> int:
files = __get_files_recursively(args.files)
# We have to use Processes instead of Threads, since
# we're using tempfile.mkdtemp, which isn't thread-safe.
+ futures = list()
with concurrent.futures.ProcessPoolExecutor() as executor:
- futures = list()
for f in files:
future = executor.submit(clean_meta, f, args.lightweight,
inplace, args.sandbox, policy)
futures.append(future)
- for future in concurrent.futures.as_completed(futures):
- no_failure &= future.result()
+ for future in concurrent.futures.as_completed(futures):
+ no_failure &= future.result()
return 0 if no_failure is True else -1
=====================================
setup.py
=====================================
@@ -5,7 +5,7 @@ with open("README.md", encoding='utf-8') as fh:
setuptools.setup(
name="mat2",
- version='0.12.3',
+ version='0.12.4',
author="Julien (jvoisin) Voisin",
author_email="julien.voisin+mat2 at dustri.org",
description="A handy tool to trash your metadata",
=====================================
tests/test_climat2.py
=====================================
@@ -1,4 +1,3 @@
-import sys
import random
import os
import shutil
@@ -267,14 +266,7 @@ class TestCommandLineParallel(unittest.TestCase):
def test_different(self):
src = './tests/data/'
dst = './tests/data/parallel'
- if sys.version_info >= (3, 8):
- with os.scandir(src) as itr:
- entries = list(itr)
- shutil._copytree(entries=entries, src=src, dst=dst, symlinks=False,
- ignore=None, copy_function=shutil.copy2,
- ignore_dangling_symlinks=False)
- else:
- shutil.copytree(src, dst)
+ shutil.copytree(src, dst)
proc = subprocess.Popen(mat2_binary + glob.glob('./tests/data/parallel/dirty.*'),
stdout=subprocess.PIPE)
@@ -286,7 +278,7 @@ class TestCommandLineParallel(unittest.TestCase):
self.assertIsNotNone(p)
p = parser_factory.get_parser(p.output_filename)
self.assertEqual(p.get_meta(), {})
- shutil.rmtree('./tests/data/parallel')
+ shutil.rmtree('./tests/data/parallel/')
def test_faulty(self):
for i in range(self.iterations):
=====================================
tests/test_libmat2.py
=====================================
@@ -445,7 +445,10 @@ class TestCleaning(unittest.TestCase):
'meta': {
'WorkDescription': "This is a test svg image for mat2's testsuite",
},
- 'expected_meta': {},
+ 'expected_meta': {
+ 'ImageSize': '128x128',
+ 'Megapixels': '0.016',
+ },
} ,{
'name': 'ppm',
'parser': images.PPMParser,
@@ -506,41 +509,42 @@ class TestCleaning(unittest.TestCase):
def test_all_parametred(self):
for case in self.data:
- if 'ffmpeg' in case:
- try:
- video._get_ffmpeg_path()
- except RuntimeError:
- raise unittest.SkipTest
-
- print('[+] Testing %s' % case['name'])
- target = './tests/data/clean.' + case['name']
- shutil.copy('./tests/data/dirty.' + case['name'], target)
- p1 = case['parser'](target)
-
- for k, v in p1.get_meta().items():
- if k not in case['meta']:
- continue
- if isinstance(v, dict):
- for _k, _v in v.items():
- if _k in case['meta'][k]:
- self.assertEqual(_v, case['meta'][k][_k])
- else:
- self.assertEqual(v, case['meta'][k])
-
- p1.lightweight_cleaning = True
- self.assertTrue(p1.remove_all())
-
- p2 = case['parser'](p1.output_filename)
- meta = p2.get_meta()
- if meta:
- for k, v in p2.get_meta().items():
- self.assertIn(k, case['expected_meta'], '"%s" is not in "%s" (%s)' % (k, case['expected_meta'], case['name']))
- self.assertIn(str(case['expected_meta'][k]), str(v))
- self.assertTrue(p2.remove_all())
-
- os.remove(target)
- os.remove(p1.output_filename)
- os.remove(p2.output_filename)
+ with self.subTest(case=case):
+ if 'ffmpeg' in case:
+ try:
+ video._get_ffmpeg_path()
+ except RuntimeError:
+ raise unittest.SkipTest
+
+ print('[+] Testing %s' % case['name'])
+ target = './tests/data/clean.' + case['name']
+ shutil.copy('./tests/data/dirty.' + case['name'], target)
+ p1 = case['parser'](target)
+
+ for k, v in p1.get_meta().items():
+ if k not in case['meta']:
+ continue
+ if isinstance(v, dict):
+ for _k, _v in v.items():
+ if _k in case['meta'][k]:
+ self.assertEqual(_v, case['meta'][k][_k])
+ else:
+ self.assertEqual(v, case['meta'][k])
+
+ p1.lightweight_cleaning = True
+ self.assertTrue(p1.remove_all())
+
+ p2 = case['parser'](p1.output_filename)
+ meta = p2.get_meta()
+ if meta:
+ for k, v in p2.get_meta().items():
+ self.assertIn(k, case['expected_meta'], '"%s" is not in "%s" (%s)' % (k, case['expected_meta'], case['name']))
+ self.assertIn(str(case['expected_meta'][k]), str(v))
+ self.assertTrue(p2.remove_all())
+
+ os.remove(target)
+ os.remove(p1.output_filename)
+ os.remove(p2.output_filename)
def test_html(self):
View it on GitLab: https://salsa.debian.org/pkg-privacy-team/mat2/-/commit/450ecda2c908b0dbfbb8e57ee8f0f038a8b3dfb6
--
View it on GitLab: https://salsa.debian.org/pkg-privacy-team/mat2/-/commit/450ecda2c908b0dbfbb8e57ee8f0f038a8b3dfb6
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/pkg-privacy-commits/attachments/20220507/a04c3d45/attachment-0001.htm>
More information about the Pkg-privacy-commits
mailing list