[Pkg-privacy-commits] [Git][pkg-privacy-team/mat2][upstream] New upstream version 0.13.2
Georg Faerber (@georg)
georg at debian.org
Sat Jan 28 20:16:49 GMT 2023
Georg Faerber pushed to branch upstream at Privacy Maintainers / mat2
Commits:
8e6a49ae by Georg Faerber at 2023-01-28T20:11:25+00:00
New upstream version 0.13.2
- - - - -
19 changed files:
- CHANGELOG.md
- doc/mat2.1
- libmat2/__init__.py
- libmat2/abstract.py
- libmat2/archive.py
- libmat2/audio.py
- libmat2/bubblewrap.py
- libmat2/epub.py
- libmat2/exiftool.py
- libmat2/harmless.py
- libmat2/images.py
- libmat2/office.py
- libmat2/parser_factory.py
- libmat2/pdf.py
- libmat2/torrent.py
- libmat2/video.py
- libmat2/web.py
- mat2
- setup.py
Changes:
=====================================
CHANGELOG.md
=====================================
@@ -1,3 +1,7 @@
+# 0.13.2 - 2023-01-28
+
+- Fix a crash on some python versions
+
# 0.13.1 - 2023-01-07
- Improve xlsx support
=====================================
doc/mat2.1
=====================================
@@ -1,4 +1,4 @@
-.TH mat2 "1" "January 2023" "mat2 0.13.1" "User Commands"
+.TH mat2 "1" "January 2023" "mat2 0.13.2" "User Commands"
.SH NAME
mat2 \- the metadata anonymisation toolkit 2
=====================================
libmat2/__init__.py
=====================================
@@ -2,7 +2,7 @@
import enum
import importlib
-from typing import Optional, Union
+from typing import Optional, Union, Dict
from . import exiftool, video
@@ -66,8 +66,9 @@ CMD_DEPENDENCIES = {
},
}
-def check_dependencies() -> dict[str, dict[str, bool]]:
- ret = dict() # type: dict[str, dict]
+
+def check_dependencies() -> Dict[str, Dict[str, bool]]:
+ ret = dict() # type: Dict[str, Dict]
for key, value in DEPENDENCIES.items():
ret[key] = {
=====================================
libmat2/abstract.py
=====================================
@@ -1,7 +1,7 @@
import abc
import os
import re
-from typing import Union
+from typing import Union, Set, Dict
class AbstractParser(abc.ABC):
@@ -9,8 +9,8 @@ class AbstractParser(abc.ABC):
It might yield `ValueError` on instantiation on invalid files,
and `RuntimeError` when something went wrong in `remove_all`.
"""
- meta_list = set() # type: set[str]
- mimetypes = set() # type: set[str]
+ meta_list = set() # type: Set[str]
+ mimetypes = set() # type: Set[str]
def __init__(self, filename: str) -> None:
"""
@@ -33,7 +33,7 @@ class AbstractParser(abc.ABC):
self.sandbox = True
@abc.abstractmethod
- def get_meta(self) -> dict[str, Union[str, dict]]:
+ def get_meta(self) -> Dict[str, Union[str, Dict]]:
"""Return all the metadata of the current file"""
@abc.abstractmethod
=====================================
libmat2/archive.py
=====================================
@@ -7,7 +7,7 @@ import tempfile
import os
import logging
import shutil
-from typing import Pattern, Union, Any
+from typing import Pattern, Union, Any, Set, Dict, List
from . import abstract, UnknownMemberPolicy, parser_factory
@@ -44,16 +44,16 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
def __init__(self, filename):
super().__init__(filename)
# We ignore typing here because mypy is too stupid
- self.archive_class = None # type: ignore
- self.member_class = None # type: ignore
+ self.archive_class = None # type: ignore
+ self.member_class = None # type: ignore
# Those are the files that have a format that _isn't_
# supported by mat2, but that we want to keep anyway.
- self.files_to_keep = set() # type: set[Pattern]
+ self.files_to_keep = set() # type: Set[Pattern]
# Those are the files that we _do not_ want to keep,
# no matter if they are supported or not.
- self.files_to_omit = set() # type: set[Pattern]
+ self.files_to_omit = set() # type: Set[Pattern]
# what should the parser do if it encounters an unknown file in
# the archive?
@@ -72,7 +72,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
# pylint: disable=unused-argument
return True # pragma: no cover
- def _specific_get_meta(self, full_path: str, file_path: str) -> dict[str, Any]:
+ def _specific_get_meta(self, full_path: str, file_path: str) -> Dict[str, Any]:
""" This method can be used to extract specific metadata
from files present in the archive."""
# pylint: disable=unused-argument
@@ -87,7 +87,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
@staticmethod
@abc.abstractmethod
- def _get_all_members(archive: ArchiveClass) -> list[ArchiveMember]:
+ def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]:
"""Return all the members of the archive."""
@staticmethod
@@ -97,7 +97,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
@staticmethod
@abc.abstractmethod
- def _get_member_meta(member: ArchiveMember) -> dict[str, str]:
+ def _get_member_meta(member: ArchiveMember) -> Dict[str, str]:
"""Return all the metadata of a given member."""
@staticmethod
@@ -128,8 +128,8 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
# pylint: disable=unused-argument
return member
- def get_meta(self) -> dict[str, Union[str, dict]]:
- meta = dict() # type: dict[str, Union[str, dict]]
+ def get_meta(self) -> Dict[str, Union[str, Dict]]:
+ meta = dict() # type: Dict[str, Union[str, Dict]]
with self.archive_class(self.filename) as zin:
temp_folder = tempfile.mkdtemp()
@@ -170,7 +170,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
# Sort the items to process, to reduce fingerprinting,
# and keep them in the `items` variable.
- items = list() # type: list[ArchiveMember]
+ items = list() # type: List[ArchiveMember]
for item in sorted(self._get_all_members(zin), key=self._get_member_name):
# Some fileformats do require to have the `mimetype` file
# as the first file in the archive.
@@ -264,6 +264,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
class TarParser(ArchiveBasedAbstractParser):
mimetypes = {'application/x-tar'}
+
def __init__(self, filename):
super().__init__(filename)
# yes, it's tarfile.open and not tarfile.TarFile,
@@ -336,7 +337,7 @@ class TarParser(ArchiveBasedAbstractParser):
return member
@staticmethod
- def _get_member_meta(member: ArchiveMember) -> dict[str, str]:
+ def _get_member_meta(member: ArchiveMember) -> Dict[str, str]:
assert isinstance(member, tarfile.TarInfo) # please mypy
metadata = {}
if member.mtime != 0:
@@ -358,7 +359,7 @@ class TarParser(ArchiveBasedAbstractParser):
archive.add(full_path, member.name, filter=TarParser._clean_member) # type: ignore
@staticmethod
- def _get_all_members(archive: ArchiveClass) -> list[ArchiveMember]:
+ def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]:
assert isinstance(archive, tarfile.TarFile) # please mypy
return archive.getmembers() # type: ignore
@@ -391,7 +392,8 @@ class TarXzParser(TarParser):
class ZipParser(ArchiveBasedAbstractParser):
mimetypes = {'application/zip'}
- def __init__(self, filename):
+
+ def __init__(self, filename: str):
super().__init__(filename)
self.archive_class = zipfile.ZipFile
self.member_class = zipfile.ZipInfo
@@ -412,7 +414,7 @@ class ZipParser(ArchiveBasedAbstractParser):
return member
@staticmethod
- def _get_member_meta(member: ArchiveMember) -> dict[str, str]:
+ def _get_member_meta(member: ArchiveMember) -> Dict[str, str]:
assert isinstance(member, zipfile.ZipInfo) # please mypy
metadata = {}
if member.create_system == 3: # this is Linux
@@ -439,7 +441,7 @@ class ZipParser(ArchiveBasedAbstractParser):
compress_type=member.compress_type)
@staticmethod
- def _get_all_members(archive: ArchiveClass) -> list[ArchiveMember]:
+ def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]:
assert isinstance(archive, zipfile.ZipFile) # please mypy
return archive.infolist() # type: ignore
=====================================
libmat2/audio.py
=====================================
@@ -2,7 +2,7 @@ import mimetypes
import os
import shutil
import tempfile
-from typing import Union
+from typing import Union, Dict
import mutagen
@@ -18,10 +18,10 @@ class MutagenParser(abstract.AbstractParser):
except mutagen.MutagenError:
raise ValueError
- def get_meta(self) -> dict[str, Union[str, dict]]:
+ def get_meta(self) -> Dict[str, Union[str, Dict]]:
f = mutagen.File(self.filename)
if f.tags:
- return {k:', '.join(map(str, v)) for k, v in f.tags.items()}
+ return {k: ', '.join(map(str, v)) for k, v in f.tags.items()}
return {}
def remove_all(self) -> bool:
@@ -38,8 +38,8 @@ class MutagenParser(abstract.AbstractParser):
class MP3Parser(MutagenParser):
mimetypes = {'audio/mpeg', }
- def get_meta(self) -> dict[str, Union[str, dict]]:
- metadata = {} # type: dict[str, Union[str, dict]]
+ def get_meta(self) -> Dict[str, Union[str, Dict]]:
+ metadata = {} # type: Dict[str, Union[str, Dict]]
meta = mutagen.File(self.filename).tags
if not meta:
return metadata
@@ -68,12 +68,12 @@ class FLACParser(MutagenParser):
f.save(deleteid3=True)
return True
- def get_meta(self) -> dict[str, Union[str, dict]]:
+ def get_meta(self) -> Dict[str, Union[str, Dict]]:
meta = super().get_meta()
for num, picture in enumerate(mutagen.File(self.filename).pictures):
name = picture.desc if picture.desc else 'Cover %d' % num
extension = mimetypes.guess_extension(picture.mime)
- if extension is None: # pragma: no cover
+ if extension is None: # pragma: no cover
meta[name] = 'harmful data'
continue
@@ -98,6 +98,7 @@ class WAVParser(video.AbstractFFmpegParser):
'MIMEType', 'NumChannels', 'SampleRate', 'SourceFile',
}
+
class AIFFParser(video.AbstractFFmpegParser):
mimetypes = {'audio/aiff', 'audio/x-aiff'}
meta_allowlist = {'AvgBytesPerSec', 'BitsPerSample', 'Directory',
=====================================
libmat2/bubblewrap.py
=====================================
@@ -12,7 +12,7 @@ import shutil
import subprocess
import tempfile
import functools
-from typing import Optional
+from typing import Optional, List
__all__ = ['PIPE', 'run', 'CalledProcessError']
@@ -33,7 +33,7 @@ def _get_bwrap_path() -> str:
def _get_bwrap_args(tempdir: str,
input_filename: str,
- output_filename: Optional[str] = None) -> list[str]:
+ output_filename: Optional[str] = None) -> List[str]:
ro_bind_args = []
cwd = os.getcwd()
@@ -78,7 +78,7 @@ def _get_bwrap_args(tempdir: str,
return args
-def run(args: list[str],
+def run(args: List[str],
input_filename: str,
output_filename: Optional[str] = None,
**kwargs) -> subprocess.CompletedProcess:
=====================================
libmat2/epub.py
=====================================
@@ -3,10 +3,11 @@ import re
import uuid
import zipfile
import xml.etree.ElementTree as ET # type: ignore
-from typing import Any
+from typing import Any, Dict
from . import archive, office
+
class EPUBParser(archive.ZipParser):
mimetypes = {'application/epub+zip', }
metadata_namespace = '{http://purl.org/dc/elements/1.1/}'
@@ -28,7 +29,6 @@ class EPUBParser(archive.ZipParser):
}))
self.uniqid = uuid.uuid4()
-
def is_archive_valid(self):
super().is_archive_valid()
with zipfile.ZipFile(self.filename) as zin:
@@ -37,7 +37,7 @@ class EPUBParser(archive.ZipParser):
if member_name.endswith('META-INF/encryption.xml'):
raise ValueError('the file contains encrypted fonts')
- def _specific_get_meta(self, full_path, file_path) -> dict[str, Any]:
+ def _specific_get_meta(self, full_path, file_path) -> Dict[str, Any]:
if not file_path.endswith('.opf'):
return {}
@@ -73,7 +73,6 @@ class EPUBParser(archive.ZipParser):
short_empty_elements=False)
return True
-
def __handle_tocncx(self, full_path: str) -> bool:
try:
tree, namespace = office._parse_xml(full_path)
=====================================
libmat2/exiftool.py
=====================================
@@ -4,7 +4,7 @@ import logging
import os
import shutil
import subprocess
-from typing import Union
+from typing import Union, Set, Dict
from . import abstract
from . import bubblewrap
@@ -15,9 +15,9 @@ class ExiftoolParser(abstract.AbstractParser):
from a import file, hence why several parsers are re-using its `get_meta`
method.
"""
- meta_allowlist = set() # type: set[str]
+ meta_allowlist = set() # type: Set[str]
- def get_meta(self) -> dict[str, Union[str, dict]]:
+ def get_meta(self) -> Dict[str, Union[str, Dict]]:
try:
if self.sandbox:
out = bubblewrap.run([_get_exiftool_path(), '-json',
=====================================
libmat2/harmless.py
=====================================
@@ -1,5 +1,5 @@
import shutil
-from typing import Union
+from typing import Union, Dict
from . import abstract
@@ -7,7 +7,7 @@ class HarmlessParser(abstract.AbstractParser):
""" This is the parser for filetypes that can not contain metadata. """
mimetypes = {'text/plain', 'image/x-ms-bmp'}
- def get_meta(self) -> dict[str, Union[str, dict]]:
+ def get_meta(self) -> Dict[str, Union[str, Dict]]:
return dict()
def remove_all(self) -> bool:
=====================================
libmat2/images.py
=====================================
@@ -1,7 +1,6 @@
-import imghdr
import os
import re
-from typing import Union, Any
+from typing import Union, Any, Dict
import cairo
@@ -49,7 +48,7 @@ class SVGParser(exiftool.ExiftoolParser):
surface.finish()
return True
- def get_meta(self) -> dict[str, Union[str, dict]]:
+ def get_meta(self) -> Dict[str, Union[str, Dict]]:
meta = super().get_meta()
# The namespace is mandatory, but only the …/2000/svg is valid.
@@ -58,6 +57,7 @@ class SVGParser(exiftool.ExiftoolParser):
meta.pop('Xmlns')
return meta
+
class PNGParser(exiftool.ExiftoolParser):
mimetypes = {'image/png', }
meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName',
@@ -71,9 +71,6 @@ class PNGParser(exiftool.ExiftoolParser):
def __init__(self, filename):
super().__init__(filename)
- if imghdr.what(filename) != 'png':
- raise ValueError
-
try: # better fail here than later
cairo.ImageSurface.create_from_png(self.filename)
except: # pragma: no cover
@@ -111,7 +108,6 @@ class GdkPixbufAbstractParser(exiftool.ExiftoolParser):
def __init__(self, filename):
super().__init__(filename)
- # we can't use imghdr here because of https://bugs.python.org/issue28591
try:
GdkPixbuf.Pixbuf.new_from_file(self.filename)
except GLib.GError:
@@ -161,11 +157,12 @@ class TiffParser(GdkPixbufAbstractParser):
'FileTypeExtension', 'ImageHeight', 'ImageSize',
'ImageWidth', 'MIMEType', 'Megapixels', 'SourceFile'}
+
class PPMParser(abstract.AbstractParser):
mimetypes = {'image/x-portable-pixmap'}
- def get_meta(self) -> dict[str, Union[str, dict]]:
- meta = {} # type: dict[str, Union[str, dict[Any, Any]]]
+ def get_meta(self) -> Dict[str, Union[str, Dict]]:
+ meta = {} # type: Dict[str, Union[str, Dict[Any, Any]]]
with open(self.filename) as f:
for idx, line in enumerate(f):
if line.lstrip().startswith('#'):
@@ -181,9 +178,10 @@ class PPMParser(abstract.AbstractParser):
fout.write(line)
return True
+
class HEICParser(exiftool.ExiftoolParser):
mimetypes = {'image/heic'}
- meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName','Directory',
+ meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',
'FileSize', 'FileModifyDate', 'FileAccessDate',
'FileInodeChangeDate', 'FilePermissions', 'FileType',
'FileTypeExtension', 'MIMEType', 'MajorBrand', 'MinorVersion',
=====================================
libmat2/office.py
=====================================
@@ -4,7 +4,7 @@ import logging
import os
import re
import zipfile
-from typing import Pattern, Any
+from typing import Pattern, Any, Tuple, Dict
import xml.etree.ElementTree as ET # type: ignore
@@ -12,7 +12,8 @@ from .archive import ZipParser
# pylint: disable=line-too-long
-def _parse_xml(full_path: str) -> tuple[ET.ElementTree, dict[str, str]]:
+
+def _parse_xml(full_path: str) -> Tuple[ET.ElementTree, Dict[str, str]]:
""" This function parses XML, with namespace support. """
namespace_map = dict()
for _, (key, value) in ET.iterparse(full_path, ("start-ns", )):
@@ -68,7 +69,6 @@ class MSOfficeParser(ZipParser):
'application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml',
}
-
def __init__(self, filename):
super().__init__(filename)
@@ -148,7 +148,7 @@ class MSOfficeParser(ZipParser):
return False
xml_data = zin.read('[Content_Types].xml')
- self.content_types = dict() # type: dict[str, str]
+ self.content_types = dict() # type: Dict[str, str]
try:
tree = ET.fromstring(xml_data)
except ET.ParseError:
@@ -218,7 +218,7 @@ class MSOfficeParser(ZipParser):
if 'w' not in namespace:
return True
- parent_map = {c:p for p in tree.iter() for c in p}
+ parent_map = {c: p for p in tree.iter() for c in p}
elements_to_remove = list()
for element in tree.iterfind('.//w:nsid', namespace):
@@ -229,7 +229,6 @@ class MSOfficeParser(ZipParser):
tree.write(full_path, xml_declaration=True)
return True
-
@staticmethod
def __remove_revisions(full_path: str) -> bool:
try:
@@ -319,7 +318,6 @@ class MSOfficeParser(ZipParser):
for i in re.findall(r'<p:cNvPr id="([0-9]+)"', content):
self.__counters['cNvPr'].add(int(i))
-
@staticmethod
def __randomize_creationId(full_path: str) -> bool:
try:
@@ -431,7 +429,7 @@ class MSOfficeParser(ZipParser):
return True
- def _specific_get_meta(self, full_path: str, file_path: str) -> dict[str, Any]:
+ def _specific_get_meta(self, full_path: str, file_path: str) -> Dict[str, Any]:
"""
Yes, I know that parsing xml with regexp ain't pretty,
be my guest and fix it if you want.
@@ -441,8 +439,8 @@ class MSOfficeParser(ZipParser):
with open(full_path, encoding='utf-8') as f:
try:
- results = re.findall(r"<(.+)>(.+)</\1>", f.read(), re.I|re.M)
- return {k:v for (k, v) in results}
+ results = re.findall(r"<(.+)>(.+)</\1>", f.read(), re.I | re.M)
+ return {k: v for (k, v) in results}
except (TypeError, UnicodeDecodeError):
# We didn't manage to parse the xml file
return {file_path: 'harmful content', }
@@ -459,7 +457,6 @@ class LibreOfficeParser(ZipParser):
'application/vnd.oasis.opendocument.image',
}
-
def __init__(self, filename):
super().__init__(filename)
@@ -512,7 +509,7 @@ class LibreOfficeParser(ZipParser):
return False
return True
- def _specific_get_meta(self, full_path: str, file_path: str) -> dict[str, Any]:
+ def _specific_get_meta(self, full_path: str, file_path: str) -> Dict[str, Any]:
"""
Yes, I know that parsing xml with regexp ain't pretty,
be my guest and fix it if you want.
=====================================
libmat2/parser_factory.py
=====================================
@@ -2,7 +2,7 @@ import glob
import os
import mimetypes
import importlib
-from typing import TypeVar, Optional
+from typing import TypeVar, Optional, List
from . import abstract, UNSUPPORTED_EXTENSIONS
@@ -34,7 +34,7 @@ def __load_all_parsers():
__load_all_parsers()
-def _get_parsers() -> list[T]:
+def _get_parsers() -> List[T]:
""" Get all our parsers!"""
def __get_parsers(cls):
return cls.__subclasses__() + \
=====================================
libmat2/pdf.py
=====================================
@@ -7,7 +7,7 @@ import re
import logging
import tempfile
import io
-from typing import Union
+from typing import Union, Dict
import cairo
import gi
@@ -18,6 +18,7 @@ from . import abstract
FIXED_PDF_VERSION = cairo.PDFVersion.VERSION_1_5
+
class PDFParser(abstract.AbstractParser):
mimetypes = {'application/pdf', }
meta_list = {'author', 'creation-date', 'creator', 'format', 'keywords',
@@ -140,13 +141,13 @@ class PDFParser(abstract.AbstractParser):
return True
@staticmethod
- def __parse_metadata_field(data: str) -> dict[str, str]:
+ def __parse_metadata_field(data: str) -> Dict[str, str]:
metadata = {}
for (_, key, value) in re.findall(r"<(xmp|pdfx|pdf|xmpMM):(.+)>(.+)</\1:\2>", data, re.I):
metadata[key] = value
return metadata
- def get_meta(self) -> dict[str, Union[str, dict]]:
+ def get_meta(self) -> Dict[str, Union[str, Dict]]:
""" Return a dict with all the meta of the file
"""
metadata = {}
=====================================
libmat2/torrent.py
=====================================
@@ -1,5 +1,5 @@
import logging
-from typing import Union
+from typing import Union, Dict, List, Tuple
from . import abstract
@@ -15,7 +15,7 @@ class TorrentParser(abstract.AbstractParser):
if self.dict_repr is None:
raise ValueError
- def get_meta(self) -> dict[str, Union[str, dict]]:
+ def get_meta(self) -> Dict[str, Union[str, Dict]]:
metadata = {}
for key, value in self.dict_repr.items():
if key not in self.allowlist:
@@ -56,7 +56,7 @@ class _BencodeHandler:
}
@staticmethod
- def __decode_int(s: bytes) -> tuple[int, bytes]:
+ def __decode_int(s: bytes) -> Tuple[int, bytes]:
s = s[1:]
next_idx = s.index(b'e')
if s.startswith(b'-0'):
@@ -66,7 +66,7 @@ class _BencodeHandler:
return int(s[:next_idx]), s[next_idx+1:]
@staticmethod
- def __decode_string(s: bytes) -> tuple[bytes, bytes]:
+ def __decode_string(s: bytes) -> Tuple[bytes, bytes]:
colon = s.index(b':')
# FIXME Python3 is broken here, the call to `ord` shouldn't be needed,
# but apparently it is. This is utterly idiotic.
@@ -76,7 +76,7 @@ class _BencodeHandler:
s = s[1:]
return s[colon:colon+str_len], s[colon+str_len:]
- def __decode_list(self, s: bytes) -> tuple[list, bytes]:
+ def __decode_list(self, s: bytes) -> Tuple[List, bytes]:
ret = list()
s = s[1:] # skip leading `l`
while s[0] != ord('e'):
@@ -84,7 +84,7 @@ class _BencodeHandler:
ret.append(value)
return ret, s[1:]
- def __decode_dict(self, s: bytes) -> tuple[dict, bytes]:
+ def __decode_dict(self, s: bytes) -> Tuple[Dict, bytes]:
ret = dict()
s = s[1:] # skip leading `d`
while s[0] != ord(b'e'):
@@ -113,10 +113,10 @@ class _BencodeHandler:
ret += self.__encode_func[type(value)](value)
return b'd' + ret + b'e'
- def bencode(self, s: Union[dict, list, bytes, int]) -> bytes:
+ def bencode(self, s: Union[Dict, List, bytes, int]) -> bytes:
return self.__encode_func[type(s)](s)
- def bdecode(self, s: bytes) -> Union[dict, None]:
+ def bdecode(self, s: bytes) -> Union[Dict, None]:
try:
ret, trail = self.__decode_func[s[0]](s)
except (IndexError, KeyError, ValueError) as e:
=====================================
libmat2/video.py
=====================================
@@ -3,7 +3,7 @@ import functools
import shutil
import logging
-from typing import Union
+from typing import Union, Dict
from . import exiftool
from . import bubblewrap
@@ -12,7 +12,7 @@ from . import bubblewrap
class AbstractFFmpegParser(exiftool.ExiftoolParser):
""" Abstract parser for all FFmpeg-based ones, mainly for video. """
# Some fileformats have mandatory metadata fields
- meta_key_value_allowlist = {} # type: dict[str, Union[str, int]]
+ meta_key_value_allowlist = {} # type: Dict[str, Union[str, int]]
def remove_all(self) -> bool:
if self.meta_key_value_allowlist:
@@ -45,10 +45,10 @@ class AbstractFFmpegParser(exiftool.ExiftoolParser):
return False
return True
- def get_meta(self) -> dict[str, Union[str, dict]]:
+ def get_meta(self) -> Dict[str, Union[str, Dict]]:
meta = super().get_meta()
- ret = dict() # type: dict[str, Union[str, dict]]
+ ret = dict() # type: Dict[str, Union[str, Dict]]
for key, value in meta.items():
if key in self.meta_key_value_allowlist:
if value == self.meta_key_value_allowlist[key]:
=====================================
libmat2/web.py
=====================================
@@ -1,5 +1,5 @@
from html import parser, escape
-from typing import Any, Optional
+from typing import Any, Optional, Dict, List, Tuple, Set
import re
import string
@@ -25,7 +25,7 @@ class CSSParser(abstract.AbstractParser):
f.write(cleaned)
return True
- def get_meta(self) -> dict[str, Any]:
+ def get_meta(self) -> Dict[str, Any]:
metadata = {}
with open(self.filename, encoding='utf-8') as f:
try:
@@ -44,10 +44,10 @@ class CSSParser(abstract.AbstractParser):
class AbstractHTMLParser(abstract.AbstractParser):
- tags_blocklist = set() # type: set[str]
+ tags_blocklist = set() # type: Set[str]
# In some html/xml-based formats some tags are mandatory,
# so we're keeping them, but are discarding their content
- tags_required_blocklist = set() # type: set[str]
+ tags_required_blocklist = set() # type: Set[str]
def __init__(self, filename):
super().__init__(filename)
@@ -57,7 +57,7 @@ class AbstractHTMLParser(abstract.AbstractParser):
self.__parser.feed(f.read())
self.__parser.close()
- def get_meta(self) -> dict[str, Any]:
+ def get_meta(self) -> Dict[str, Any]:
return self.__parser.get_meta()
def remove_all(self) -> bool:
@@ -112,7 +112,7 @@ class _HTMLParser(parser.HTMLParser):
"""
raise ValueError(message)
- def handle_starttag(self, tag: str, attrs: list[tuple[str, Optional[str]]]):
+ def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]):
# Ignore the type, because mypy is too stupid to infer
# that get_starttag_text() can't return None.
original_tag = self.get_starttag_text() # type: ignore
@@ -159,7 +159,7 @@ class _HTMLParser(parser.HTMLParser):
self.__textrepr += escape(data)
def handle_startendtag(self, tag: str,
- attrs: list[tuple[str, Optional[str]]]):
+ attrs: List[Tuple[str, Optional[str]]]):
if tag in self.tag_required_blocklist | self.tag_blocklist:
meta = {k:v for k, v in attrs}
name = meta.get('name', 'harmful metadata')
@@ -184,7 +184,7 @@ class _HTMLParser(parser.HTMLParser):
f.write(self.__textrepr)
return True
- def get_meta(self) -> dict[str, Any]:
+ def get_meta(self) -> Dict[str, Any]:
if self.__validation_queue:
raise ValueError("Some tags (%s) were left unclosed in %s" % (
', '.join(self.__validation_queue),
=====================================
mat2
=====================================
@@ -2,6 +2,7 @@
import os
import shutil
+from typing import List, Set, Dict
import sys
import mimetypes
import argparse
@@ -16,7 +17,7 @@ except ValueError as ex:
print(ex)
sys.exit(1)
-__version__ = '0.13.1'
+__version__ = '0.13.2'
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING)
@@ -35,7 +36,7 @@ def __check_file(filename: str, mode: int = os.R_OK) -> bool:
__print_without_chars("[-] %s is not a regular file." % filename)
return False
elif not os.access(filename, mode):
- mode_str = [] # type: list[str]
+ mode_str = [] # type: List[str]
if mode & os.R_OK:
mode_str += 'readable'
if mode & os.W_OK:
@@ -97,7 +98,7 @@ def show_meta(filename: str, sandbox: bool):
__print_meta(filename, p.get_meta())
-def __print_meta(filename: str, metadata: dict, depth: int = 1):
+def __print_meta(filename: str, metadata: Dict, depth: int = 1):
padding = " " * depth*2
if not metadata:
__print_without_chars(padding + "No metadata found in %s." % filename)
@@ -151,10 +152,10 @@ def clean_meta(filename: str, is_lightweight: bool, inplace: bool, sandbox: bool
def show_parsers():
print('[+] Supported formats:')
- formats = set() # set[str]
+ formats = set() # Set[str]
for parser in parser_factory._get_parsers(): # type: ignore
for mtype in parser.mimetypes:
- extensions = set() # set[str]
+ extensions = set() # Set[str]
for extension in mimetypes.guess_all_extensions(mtype):
if extension not in UNSUPPORTED_EXTENSIONS:
extensions.add(extension)
@@ -163,11 +164,11 @@ def show_parsers():
# mimetype, so there is not point in showing the mimetype at all
continue
formats.add(' - %s (%s)' % (mtype, ', '.join(extensions)))
- __print_without_chars('\n'.join(sorted(formats)))
+ print('\n'.join(sorted(formats)))
-def __get_files_recursively(files: list[str]) -> list[str]:
- ret = set() # type: set[str]
+def __get_files_recursively(files: List[str]) -> List[str]:
+ ret = set() # type: Set[str]
for f in files:
if os.path.isdir(f):
for path, _, _files in os.walk(f):
=====================================
setup.py
=====================================
@@ -5,7 +5,7 @@ with open("README.md", encoding='utf-8') as fh:
setuptools.setup(
name="mat2",
- version='0.13.1',
+ version='0.13.2',
author="Julien (jvoisin) Voisin",
author_email="julien.voisin+mat2 at dustri.org",
description="A handy tool to trash your metadata",
View it on GitLab: https://salsa.debian.org/pkg-privacy-team/mat2/-/commit/8e6a49aef7ad217ebbd4d3ca851ba822326ad237
--
View it on GitLab: https://salsa.debian.org/pkg-privacy-team/mat2/-/commit/8e6a49aef7ad217ebbd4d3ca851ba822326ad237
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/pkg-privacy-commits/attachments/20230128/c4555bf3/attachment-0001.htm>
More information about the Pkg-privacy-commits
mailing list