summaryrefslogtreecommitdiff
path: root/libmat2
diff options
context:
space:
mode:
Diffstat (limited to 'libmat2')
-rw-r--r--libmat2/__init__.py7
-rw-r--r--libmat2/abstract.py10
-rw-r--r--libmat2/archive.py27
-rw-r--r--libmat2/audio.py10
-rw-r--r--libmat2/bubblewrap.py6
-rw-r--r--libmat2/epub.py4
-rw-r--r--libmat2/exiftool.py9
-rw-r--r--libmat2/harmless.py4
-rw-r--r--libmat2/images.py9
-rw-r--r--libmat2/office.py11
-rw-r--r--libmat2/parser_factory.py6
-rw-r--r--libmat2/pdf.py6
-rw-r--r--libmat2/torrent.py12
-rw-r--r--libmat2/video.py8
-rw-r--r--libmat2/web.py19
15 files changed, 69 insertions, 79 deletions
diff --git a/libmat2/__init__.py b/libmat2/__init__.py
index ff899ff..762686f 100644
--- a/libmat2/__init__.py
+++ b/libmat2/__init__.py
@@ -2,12 +2,11 @@
2 2
3import enum 3import enum
4import importlib 4import importlib
5from typing import Dict, Optional, Union 5from typing import Optional, Union
6 6
7from . import exiftool, video 7from . import exiftool, video
8 8
9# make pyflakes happy 9# make pyflakes happy
10assert Dict
11assert Optional 10assert Optional
12assert Union 11assert Union
13 12
@@ -67,8 +66,8 @@ CMD_DEPENDENCIES = {
67 }, 66 },
68} 67}
69 68
70def check_dependencies() -> Dict[str, Dict[str, bool]]: 69def check_dependencies() -> dict[str, dict[str, bool]]:
71 ret = dict() # type: Dict[str, dict] 70 ret = dict() # type: dict[str, dict]
72 71
73 for key, value in DEPENDENCIES.items(): 72 for key, value in DEPENDENCIES.items():
74 ret[key] = { 73 ret[key] = {
diff --git a/libmat2/abstract.py b/libmat2/abstract.py
index 5cfd0f2..426ccfc 100644
--- a/libmat2/abstract.py
+++ b/libmat2/abstract.py
@@ -1,9 +1,7 @@
1import abc 1import abc
2import os 2import os
3import re 3import re
4from typing import Set, Dict, Union 4from typing import Union
5
6assert Set # make pyflakes happy
7 5
8 6
9class AbstractParser(abc.ABC): 7class AbstractParser(abc.ABC):
@@ -11,8 +9,8 @@ class AbstractParser(abc.ABC):
11 It might yield `ValueError` on instantiation on invalid files, 9 It might yield `ValueError` on instantiation on invalid files,
12 and `RuntimeError` when something went wrong in `remove_all`. 10 and `RuntimeError` when something went wrong in `remove_all`.
13 """ 11 """
14 meta_list = set() # type: Set[str] 12 meta_list = set() # type: set[str]
15 mimetypes = set() # type: Set[str] 13 mimetypes = set() # type: set[str]
16 14
17 def __init__(self, filename: str) -> None: 15 def __init__(self, filename: str) -> None:
18 """ 16 """
@@ -35,7 +33,7 @@ class AbstractParser(abc.ABC):
35 self.sandbox = True 33 self.sandbox = True
36 34
37 @abc.abstractmethod 35 @abc.abstractmethod
38 def get_meta(self) -> Dict[str, Union[str, dict]]: 36 def get_meta(self) -> dict[str, Union[str, dict]]:
39 """Return all the metadata of the current file""" 37 """Return all the metadata of the current file"""
40 38
41 @abc.abstractmethod 39 @abc.abstractmethod
diff --git a/libmat2/archive.py b/libmat2/archive.py
index 39fb23e..17ec608 100644
--- a/libmat2/archive.py
+++ b/libmat2/archive.py
@@ -7,12 +7,11 @@ import tempfile
7import os 7import os
8import logging 8import logging
9import shutil 9import shutil
10from typing import Dict, Set, Pattern, Union, Any, List 10from typing import Pattern, Union, Any
11 11
12from . import abstract, UnknownMemberPolicy, parser_factory 12from . import abstract, UnknownMemberPolicy, parser_factory
13 13
14# Make pyflakes happy 14# Make pyflakes happy
15assert Set
16assert Pattern 15assert Pattern
17 16
18# pylint: disable=not-callable,assignment-from-no-return,too-many-branches 17# pylint: disable=not-callable,assignment-from-no-return,too-many-branches
@@ -53,11 +52,11 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
53 52
54 # Those are the files that have a format that _isn't_ 53 # Those are the files that have a format that _isn't_
55 # supported by mat2, but that we want to keep anyway. 54 # supported by mat2, but that we want to keep anyway.
56 self.files_to_keep = set() # type: Set[Pattern] 55 self.files_to_keep = set() # type: set[Pattern]
57 56
58 # Those are the files that we _do not_ want to keep, 57 # Those are the files that we _do not_ want to keep,
59 # no matter if they are supported or not. 58 # no matter if they are supported or not.
60 self.files_to_omit = set() # type: Set[Pattern] 59 self.files_to_omit = set() # type: set[Pattern]
61 60
62 # what should the parser do if it encounters an unknown file in 61 # what should the parser do if it encounters an unknown file in
63 # the archive? 62 # the archive?
@@ -76,7 +75,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
76 # pylint: disable=unused-argument,no-self-use 75 # pylint: disable=unused-argument,no-self-use
77 return True # pragma: no cover 76 return True # pragma: no cover
78 77
79 def _specific_get_meta(self, full_path: str, file_path: str) -> Dict[str, Any]: 78 def _specific_get_meta(self, full_path: str, file_path: str) -> dict[str, Any]:
80 """ This method can be used to extract specific metadata 79 """ This method can be used to extract specific metadata
81 from files present in the archive.""" 80 from files present in the archive."""
82 # pylint: disable=unused-argument,no-self-use 81 # pylint: disable=unused-argument,no-self-use
@@ -91,7 +90,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
91 90
92 @staticmethod 91 @staticmethod
93 @abc.abstractmethod 92 @abc.abstractmethod
94 def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]: 93 def _get_all_members(archive: ArchiveClass) -> list[ArchiveMember]:
95 """Return all the members of the archive.""" 94 """Return all the members of the archive."""
96 95
97 @staticmethod 96 @staticmethod
@@ -101,7 +100,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
101 100
102 @staticmethod 101 @staticmethod
103 @abc.abstractmethod 102 @abc.abstractmethod
104 def _get_member_meta(member: ArchiveMember) -> Dict[str, str]: 103 def _get_member_meta(member: ArchiveMember) -> dict[str, str]:
105 """Return all the metadata of a given member.""" 104 """Return all the metadata of a given member."""
106 105
107 @staticmethod 106 @staticmethod
@@ -132,8 +131,8 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
132 # pylint: disable=unused-argument 131 # pylint: disable=unused-argument
133 return member 132 return member
134 133
135 def get_meta(self) -> Dict[str, Union[str, dict]]: 134 def get_meta(self) -> dict[str, Union[str, dict]]:
136 meta = dict() # type: Dict[str, Union[str, dict]] 135 meta = dict() # type: dict[str, Union[str, dict]]
137 136
138 with self.archive_class(self.filename) as zin: 137 with self.archive_class(self.filename) as zin:
139 temp_folder = tempfile.mkdtemp() 138 temp_folder = tempfile.mkdtemp()
@@ -174,7 +173,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
174 173
175 # Sort the items to process, to reduce fingerprinting, 174 # Sort the items to process, to reduce fingerprinting,
176 # and keep them in the `items` variable. 175 # and keep them in the `items` variable.
177 items = list() # type: List[ArchiveMember] 176 items = list() # type: list[ArchiveMember]
178 for item in sorted(self._get_all_members(zin), key=self._get_member_name): 177 for item in sorted(self._get_all_members(zin), key=self._get_member_name):
179 # Some fileformats do require to have the `mimetype` file 178 # Some fileformats do require to have the `mimetype` file
180 # as the first file in the archive. 179 # as the first file in the archive.
@@ -340,7 +339,7 @@ class TarParser(ArchiveBasedAbstractParser):
340 return member 339 return member
341 340
342 @staticmethod 341 @staticmethod
343 def _get_member_meta(member: ArchiveMember) -> Dict[str, str]: 342 def _get_member_meta(member: ArchiveMember) -> dict[str, str]:
344 assert isinstance(member, tarfile.TarInfo) # please mypy 343 assert isinstance(member, tarfile.TarInfo) # please mypy
345 metadata = {} 344 metadata = {}
346 if member.mtime != 0: 345 if member.mtime != 0:
@@ -362,7 +361,7 @@ class TarParser(ArchiveBasedAbstractParser):
362 archive.add(full_path, member.name, filter=TarParser._clean_member) # type: ignore 361 archive.add(full_path, member.name, filter=TarParser._clean_member) # type: ignore
363 362
364 @staticmethod 363 @staticmethod
365 def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]: 364 def _get_all_members(archive: ArchiveClass) -> list[ArchiveMember]:
366 assert isinstance(archive, tarfile.TarFile) # please mypy 365 assert isinstance(archive, tarfile.TarFile) # please mypy
367 return archive.getmembers() # type: ignore 366 return archive.getmembers() # type: ignore
368 367
@@ -416,7 +415,7 @@ class ZipParser(ArchiveBasedAbstractParser):
416 return member 415 return member
417 416
418 @staticmethod 417 @staticmethod
419 def _get_member_meta(member: ArchiveMember) -> Dict[str, str]: 418 def _get_member_meta(member: ArchiveMember) -> dict[str, str]:
420 assert isinstance(member, zipfile.ZipInfo) # please mypy 419 assert isinstance(member, zipfile.ZipInfo) # please mypy
421 metadata = {} 420 metadata = {}
422 if member.create_system == 3: # this is Linux 421 if member.create_system == 3: # this is Linux
@@ -443,7 +442,7 @@ class ZipParser(ArchiveBasedAbstractParser):
443 compress_type=member.compress_type) 442 compress_type=member.compress_type)
444 443
445 @staticmethod 444 @staticmethod
446 def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]: 445 def _get_all_members(archive: ArchiveClass) -> list[ArchiveMember]:
447 assert isinstance(archive, zipfile.ZipFile) # please mypy 446 assert isinstance(archive, zipfile.ZipFile) # please mypy
448 return archive.infolist() # type: ignore 447 return archive.infolist() # type: ignore
449 448
diff --git a/libmat2/audio.py b/libmat2/audio.py
index 598d93d..366d451 100644
--- a/libmat2/audio.py
+++ b/libmat2/audio.py
@@ -2,7 +2,7 @@ import mimetypes
2import os 2import os
3import shutil 3import shutil
4import tempfile 4import tempfile
5from typing import Dict, Union 5from typing import Union
6 6
7import mutagen 7import mutagen
8 8
@@ -18,7 +18,7 @@ class MutagenParser(abstract.AbstractParser):
18 except mutagen.MutagenError: 18 except mutagen.MutagenError:
19 raise ValueError 19 raise ValueError
20 20
21 def get_meta(self) -> Dict[str, Union[str, dict]]: 21 def get_meta(self) -> dict[str, Union[str, dict]]:
22 f = mutagen.File(self.filename) 22 f = mutagen.File(self.filename)
23 if f.tags: 23 if f.tags:
24 return {k:', '.join(map(str, v)) for k, v in f.tags.items()} 24 return {k:', '.join(map(str, v)) for k, v in f.tags.items()}
@@ -38,8 +38,8 @@ class MutagenParser(abstract.AbstractParser):
38class MP3Parser(MutagenParser): 38class MP3Parser(MutagenParser):
39 mimetypes = {'audio/mpeg', } 39 mimetypes = {'audio/mpeg', }
40 40
41 def get_meta(self) -> Dict[str, Union[str, dict]]: 41 def get_meta(self) -> dict[str, Union[str, dict]]:
42 metadata = {} # type: Dict[str, Union[str, dict]] 42 metadata = {} # type: dict[str, Union[str, dict]]
43 meta = mutagen.File(self.filename).tags 43 meta = mutagen.File(self.filename).tags
44 if not meta: 44 if not meta:
45 return metadata 45 return metadata
@@ -68,7 +68,7 @@ class FLACParser(MutagenParser):
68 f.save(deleteid3=True) 68 f.save(deleteid3=True)
69 return True 69 return True
70 70
71 def get_meta(self) -> Dict[str, Union[str, dict]]: 71 def get_meta(self) -> dict[str, Union[str, dict]]:
72 meta = super().get_meta() 72 meta = super().get_meta()
73 for num, picture in enumerate(mutagen.File(self.filename).pictures): 73 for num, picture in enumerate(mutagen.File(self.filename).pictures):
74 name = picture.desc if picture.desc else 'Cover %d' % num 74 name = picture.desc if picture.desc else 'Cover %d' % num
diff --git a/libmat2/bubblewrap.py b/libmat2/bubblewrap.py
index 1bb0f82..0e202b9 100644
--- a/libmat2/bubblewrap.py
+++ b/libmat2/bubblewrap.py
@@ -12,7 +12,7 @@ import shutil
12import subprocess 12import subprocess
13import tempfile 13import tempfile
14import functools 14import functools
15from typing import List, Optional 15from typing import Optional
16 16
17 17
18__all__ = ['PIPE', 'run', 'CalledProcessError'] 18__all__ = ['PIPE', 'run', 'CalledProcessError']
@@ -33,7 +33,7 @@ def _get_bwrap_path() -> str:
33 33
34def _get_bwrap_args(tempdir: str, 34def _get_bwrap_args(tempdir: str,
35 input_filename: str, 35 input_filename: str,
36 output_filename: Optional[str] = None) -> List[str]: 36 output_filename: Optional[str] = None) -> list[str]:
37 ro_bind_args = [] 37 ro_bind_args = []
38 cwd = os.getcwd() 38 cwd = os.getcwd()
39 39
@@ -78,7 +78,7 @@ def _get_bwrap_args(tempdir: str,
78 return args 78 return args
79 79
80 80
81def run(args: List[str], 81def run(args: list[str],
82 input_filename: str, 82 input_filename: str,
83 output_filename: Optional[str] = None, 83 output_filename: Optional[str] = None,
84 **kwargs) -> subprocess.CompletedProcess: 84 **kwargs) -> subprocess.CompletedProcess:
diff --git a/libmat2/epub.py b/libmat2/epub.py
index be05562..7613d35 100644
--- a/libmat2/epub.py
+++ b/libmat2/epub.py
@@ -3,7 +3,7 @@ import re
3import uuid 3import uuid
4import zipfile 4import zipfile
5import xml.etree.ElementTree as ET # type: ignore 5import xml.etree.ElementTree as ET # type: ignore
6from typing import Dict, Any 6from typing import Any
7 7
8from . import archive, office 8from . import archive, office
9 9
@@ -37,7 +37,7 @@ class EPUBParser(archive.ZipParser):
37 if member_name.endswith('META-INF/encryption.xml'): 37 if member_name.endswith('META-INF/encryption.xml'):
38 raise ValueError('the file contains encrypted fonts') 38 raise ValueError('the file contains encrypted fonts')
39 39
40 def _specific_get_meta(self, full_path, file_path) -> Dict[str, Any]: 40 def _specific_get_meta(self, full_path, file_path) -> dict[str, Any]:
41 if not file_path.endswith('.opf'): 41 if not file_path.endswith('.opf'):
42 return {} 42 return {}
43 43
diff --git a/libmat2/exiftool.py b/libmat2/exiftool.py
index 933ea63..cdfce3d 100644
--- a/libmat2/exiftool.py
+++ b/libmat2/exiftool.py
@@ -4,23 +4,20 @@ import logging
4import os 4import os
5import shutil 5import shutil
6import subprocess 6import subprocess
7from typing import Dict, Union, Set 7from typing import Union
8 8
9from . import abstract 9from . import abstract
10from . import bubblewrap 10from . import bubblewrap
11 11
12# Make pyflakes happy
13assert Set
14
15 12
16class ExiftoolParser(abstract.AbstractParser): 13class ExiftoolParser(abstract.AbstractParser):
17 """ Exiftool is often the easiest way to get all the metadata 14 """ Exiftool is often the easiest way to get all the metadata
18 from a import file, hence why several parsers are re-using its `get_meta` 15 from a import file, hence why several parsers are re-using its `get_meta`
19 method. 16 method.
20 """ 17 """
21 meta_allowlist = set() # type: Set[str] 18 meta_allowlist = set() # type: set[str]
22 19
23 def get_meta(self) -> Dict[str, Union[str, dict]]: 20 def get_meta(self) -> dict[str, Union[str, dict]]:
24 try: 21 try:
25 if self.sandbox: 22 if self.sandbox:
26 out = bubblewrap.run([_get_exiftool_path(), '-json', 23 out = bubblewrap.run([_get_exiftool_path(), '-json',
diff --git a/libmat2/harmless.py b/libmat2/harmless.py
index fad0ef8..8688a9d 100644
--- a/libmat2/harmless.py
+++ b/libmat2/harmless.py
@@ -1,5 +1,5 @@
1import shutil 1import shutil
2from typing import Dict, Union 2from typing import Union
3from . import abstract 3from . import abstract
4 4
5 5
@@ -7,7 +7,7 @@ class HarmlessParser(abstract.AbstractParser):
7 """ This is the parser for filetypes that can not contain metadata. """ 7 """ This is the parser for filetypes that can not contain metadata. """
8 mimetypes = {'text/plain', 'image/x-ms-bmp'} 8 mimetypes = {'text/plain', 'image/x-ms-bmp'}
9 9
10 def get_meta(self) -> Dict[str, Union[str, dict]]: 10 def get_meta(self) -> dict[str, Union[str, dict]]:
11 return dict() 11 return dict()
12 12
13 def remove_all(self) -> bool: 13 def remove_all(self) -> bool:
diff --git a/libmat2/images.py b/libmat2/images.py
index 9c24998..37ed36c 100644
--- a/libmat2/images.py
+++ b/libmat2/images.py
@@ -1,7 +1,7 @@
1import imghdr 1import imghdr
2import os 2import os
3import re 3import re
4from typing import Set, Dict, Union, Any 4from typing import Union, Any
5 5
6import cairo 6import cairo
7 7
@@ -13,7 +13,6 @@ from gi.repository import GdkPixbuf, GLib, Rsvg
13from . import exiftool, abstract 13from . import exiftool, abstract
14 14
15# Make pyflakes happy 15# Make pyflakes happy
16assert Set
17assert Any 16assert Any
18 17
19class SVGParser(exiftool.ExiftoolParser): 18class SVGParser(exiftool.ExiftoolParser):
@@ -50,7 +49,7 @@ class SVGParser(exiftool.ExiftoolParser):
50 surface.finish() 49 surface.finish()
51 return True 50 return True
52 51
53 def get_meta(self) -> Dict[str, Union[str, dict]]: 52 def get_meta(self) -> dict[str, Union[str, dict]]:
54 meta = super().get_meta() 53 meta = super().get_meta()
55 54
56 # The namespace is mandatory, but only the …/2000/svg is valid. 55 # The namespace is mandatory, but only the …/2000/svg is valid.
@@ -165,8 +164,8 @@ class TiffParser(GdkPixbufAbstractParser):
165class PPMParser(abstract.AbstractParser): 164class PPMParser(abstract.AbstractParser):
166 mimetypes = {'image/x-portable-pixmap'} 165 mimetypes = {'image/x-portable-pixmap'}
167 166
168 def get_meta(self) -> Dict[str, Union[str, dict]]: 167 def get_meta(self) -> dict[str, Union[str, dict]]:
169 meta = {} # type: Dict[str, Union[str, Dict[Any, Any]]] 168 meta = {} # type: dict[str, Union[str, dict[Any, Any]]]
170 with open(self.filename) as f: 169 with open(self.filename) as f:
171 for idx, line in enumerate(f): 170 for idx, line in enumerate(f):
172 if line.lstrip().startswith('#'): 171 if line.lstrip().startswith('#'):
diff --git a/libmat2/office.py b/libmat2/office.py
index d7eca11..ae6ef39 100644
--- a/libmat2/office.py
+++ b/libmat2/office.py
@@ -4,7 +4,7 @@ import logging
4import os 4import os
5import re 5import re
6import zipfile 6import zipfile
7from typing import Dict, Set, Pattern, Tuple, Any 7from typing import Pattern, Any
8 8
9import xml.etree.ElementTree as ET # type: ignore 9import xml.etree.ElementTree as ET # type: ignore
10 10
@@ -13,10 +13,9 @@ from .archive import ZipParser
13# pylint: disable=line-too-long 13# pylint: disable=line-too-long
14 14
15# Make pyflakes happy 15# Make pyflakes happy
16assert Set
17assert Pattern 16assert Pattern
18 17
19def _parse_xml(full_path: str) -> Tuple[ET.ElementTree, Dict[str, str]]: 18def _parse_xml(full_path: str) -> tuple[ET.ElementTree, dict[str, str]]:
20 """ This function parses XML, with namespace support. """ 19 """ This function parses XML, with namespace support. """
21 namespace_map = dict() 20 namespace_map = dict()
22 for _, (key, value) in ET.iterparse(full_path, ("start-ns", )): 21 for _, (key, value) in ET.iterparse(full_path, ("start-ns", )):
@@ -148,7 +147,7 @@ class MSOfficeParser(ZipParser):
148 return False 147 return False
149 xml_data = zin.read('[Content_Types].xml') 148 xml_data = zin.read('[Content_Types].xml')
150 149
151 self.content_types = dict() # type: Dict[str, str] 150 self.content_types = dict() # type: dict[str, str]
152 try: 151 try:
153 tree = ET.fromstring(xml_data) 152 tree = ET.fromstring(xml_data)
154 except ET.ParseError: 153 except ET.ParseError:
@@ -431,7 +430,7 @@ class MSOfficeParser(ZipParser):
431 430
432 return True 431 return True
433 432
434 def _specific_get_meta(self, full_path: str, file_path: str) -> Dict[str, Any]: 433 def _specific_get_meta(self, full_path: str, file_path: str) -> dict[str, Any]:
435 """ 434 """
436 Yes, I know that parsing xml with regexp ain't pretty, 435 Yes, I know that parsing xml with regexp ain't pretty,
437 be my guest and fix it if you want. 436 be my guest and fix it if you want.
@@ -512,7 +511,7 @@ class LibreOfficeParser(ZipParser):
512 return False 511 return False
513 return True 512 return True
514 513
515 def _specific_get_meta(self, full_path: str, file_path: str) -> Dict[str, Any]: 514 def _specific_get_meta(self, full_path: str, file_path: str) -> dict[str, Any]:
516 """ 515 """
517 Yes, I know that parsing xml with regexp ain't pretty, 516 Yes, I know that parsing xml with regexp ain't pretty,
518 be my guest and fix it if you want. 517 be my guest and fix it if you want.
diff --git a/libmat2/parser_factory.py b/libmat2/parser_factory.py
index a539d12..4527b88 100644
--- a/libmat2/parser_factory.py
+++ b/libmat2/parser_factory.py
@@ -2,7 +2,7 @@ import glob
2import os 2import os
3import mimetypes 3import mimetypes
4import importlib 4import importlib
5from typing import TypeVar, List, Tuple, Optional 5from typing import TypeVar, Optional
6 6
7from . import abstract, UNSUPPORTED_EXTENSIONS 7from . import abstract, UNSUPPORTED_EXTENSIONS
8 8
@@ -34,7 +34,7 @@ def __load_all_parsers():
34__load_all_parsers() 34__load_all_parsers()
35 35
36 36
37def _get_parsers() -> List[T]: 37def _get_parsers() -> list[T]:
38 """ Get all our parsers!""" 38 """ Get all our parsers!"""
39 def __get_parsers(cls): 39 def __get_parsers(cls):
40 return cls.__subclasses__() + \ 40 return cls.__subclasses__() + \
@@ -42,7 +42,7 @@ def _get_parsers() -> List[T]:
42 return __get_parsers(abstract.AbstractParser) 42 return __get_parsers(abstract.AbstractParser)
43 43
44 44
45def get_parser(filename: str) -> Tuple[Optional[T], Optional[str]]: 45def get_parser(filename: str) -> tuple[Optional[T], Optional[str]]:
46 """ Return the appropriate parser for a given filename. 46 """ Return the appropriate parser for a given filename.
47 47
48 :raises ValueError: Raised if the instantiation of the parser went wrong. 48 :raises ValueError: Raised if the instantiation of the parser went wrong.
diff --git a/libmat2/pdf.py b/libmat2/pdf.py
index b79c7c7..24c280c 100644
--- a/libmat2/pdf.py
+++ b/libmat2/pdf.py
@@ -7,7 +7,7 @@ import re
7import logging 7import logging
8import tempfile 8import tempfile
9import io 9import io
10from typing import Dict, Union 10from typing import Union
11from distutils.version import LooseVersion 11from distutils.version import LooseVersion
12 12
13import cairo 13import cairo
@@ -146,13 +146,13 @@ class PDFParser(abstract.AbstractParser):
146 return True 146 return True
147 147
148 @staticmethod 148 @staticmethod
149 def __parse_metadata_field(data: str) -> Dict[str, str]: 149 def __parse_metadata_field(data: str) -> dict[str, str]:
150 metadata = {} 150 metadata = {}
151 for (_, key, value) in re.findall(r"<(xmp|pdfx|pdf|xmpMM):(.+)>(.+)</\1:\2>", data, re.I): 151 for (_, key, value) in re.findall(r"<(xmp|pdfx|pdf|xmpMM):(.+)>(.+)</\1:\2>", data, re.I):
152 metadata[key] = value 152 metadata[key] = value
153 return metadata 153 return metadata
154 154
155 def get_meta(self) -> Dict[str, Union[str, dict]]: 155 def get_meta(self) -> dict[str, Union[str, dict]]:
156 """ Return a dict with all the meta of the file 156 """ Return a dict with all the meta of the file
157 """ 157 """
158 metadata = {} 158 metadata = {}
diff --git a/libmat2/torrent.py b/libmat2/torrent.py
index 1a82740..c547a20 100644
--- a/libmat2/torrent.py
+++ b/libmat2/torrent.py
@@ -1,5 +1,5 @@
1import logging 1import logging
2from typing import Union, Tuple, Dict 2from typing import Union
3 3
4from . import abstract 4from . import abstract
5 5
@@ -15,7 +15,7 @@ class TorrentParser(abstract.AbstractParser):
15 if self.dict_repr is None: 15 if self.dict_repr is None:
16 raise ValueError 16 raise ValueError
17 17
18 def get_meta(self) -> Dict[str, Union[str, dict]]: 18 def get_meta(self) -> dict[str, Union[str, dict]]:
19 metadata = {} 19 metadata = {}
20 for key, value in self.dict_repr.items(): 20 for key, value in self.dict_repr.items():
21 if key not in self.allowlist: 21 if key not in self.allowlist:
@@ -56,7 +56,7 @@ class _BencodeHandler:
56 } 56 }
57 57
58 @staticmethod 58 @staticmethod
59 def __decode_int(s: bytes) -> Tuple[int, bytes]: 59 def __decode_int(s: bytes) -> tuple[int, bytes]:
60 s = s[1:] 60 s = s[1:]
61 next_idx = s.index(b'e') 61 next_idx = s.index(b'e')
62 if s.startswith(b'-0'): 62 if s.startswith(b'-0'):
@@ -66,7 +66,7 @@ class _BencodeHandler:
66 return int(s[:next_idx]), s[next_idx+1:] 66 return int(s[:next_idx]), s[next_idx+1:]
67 67
68 @staticmethod 68 @staticmethod
69 def __decode_string(s: bytes) -> Tuple[bytes, bytes]: 69 def __decode_string(s: bytes) -> tuple[bytes, bytes]:
70 colon = s.index(b':') 70 colon = s.index(b':')
71 # FIXME Python3 is broken here, the call to `ord` shouldn't be needed, 71 # FIXME Python3 is broken here, the call to `ord` shouldn't be needed,
72 # but apparently it is. This is utterly idiotic. 72 # but apparently it is. This is utterly idiotic.
@@ -76,7 +76,7 @@ class _BencodeHandler:
76 s = s[1:] 76 s = s[1:]
77 return s[colon:colon+str_len], s[colon+str_len:] 77 return s[colon:colon+str_len], s[colon+str_len:]
78 78
79 def __decode_list(self, s: bytes) -> Tuple[list, bytes]: 79 def __decode_list(self, s: bytes) -> tuple[list, bytes]:
80 ret = list() 80 ret = list()
81 s = s[1:] # skip leading `l` 81 s = s[1:] # skip leading `l`
82 while s[0] != ord('e'): 82 while s[0] != ord('e'):
@@ -84,7 +84,7 @@ class _BencodeHandler:
84 ret.append(value) 84 ret.append(value)
85 return ret, s[1:] 85 return ret, s[1:]
86 86
87 def __decode_dict(self, s: bytes) -> Tuple[dict, bytes]: 87 def __decode_dict(self, s: bytes) -> tuple[dict, bytes]:
88 ret = dict() 88 ret = dict()
89 s = s[1:] # skip leading `d` 89 s = s[1:] # skip leading `d`
90 while s[0] != ord(b'e'): 90 while s[0] != ord(b'e'):
diff --git a/libmat2/video.py b/libmat2/video.py
index ae9e463..1b38c77 100644
--- a/libmat2/video.py
+++ b/libmat2/video.py
@@ -3,7 +3,7 @@ import functools
3import shutil 3import shutil
4import logging 4import logging
5 5
6from typing import Dict, Union 6from typing import Union
7 7
8from . import exiftool 8from . import exiftool
9from . import bubblewrap 9from . import bubblewrap
@@ -12,7 +12,7 @@ from . import bubblewrap
12class AbstractFFmpegParser(exiftool.ExiftoolParser): 12class AbstractFFmpegParser(exiftool.ExiftoolParser):
13 """ Abstract parser for all FFmpeg-based ones, mainly for video. """ 13 """ Abstract parser for all FFmpeg-based ones, mainly for video. """
14 # Some fileformats have mandatory metadata fields 14 # Some fileformats have mandatory metadata fields
15 meta_key_value_allowlist = {} # type: Dict[str, Union[str, int]] 15 meta_key_value_allowlist = {} # type: dict[str, Union[str, int]]
16 16
17 def remove_all(self) -> bool: 17 def remove_all(self) -> bool:
18 if self.meta_key_value_allowlist: 18 if self.meta_key_value_allowlist:
@@ -45,10 +45,10 @@ class AbstractFFmpegParser(exiftool.ExiftoolParser):
45 return False 45 return False
46 return True 46 return True
47 47
48 def get_meta(self) -> Dict[str, Union[str, dict]]: 48 def get_meta(self) -> dict[str, Union[str, dict]]:
49 meta = super().get_meta() 49 meta = super().get_meta()
50 50
51 ret = dict() # type: Dict[str, Union[str, dict]] 51 ret = dict() # type: dict[str, Union[str, dict]]
52 for key, value in meta.items(): 52 for key, value in meta.items():
53 if key in self.meta_key_value_allowlist: 53 if key in self.meta_key_value_allowlist:
54 if value == self.meta_key_value_allowlist[key]: 54 if value == self.meta_key_value_allowlist[key]:
diff --git a/libmat2/web.py b/libmat2/web.py
index 2f331d8..d324996 100644
--- a/libmat2/web.py
+++ b/libmat2/web.py
@@ -1,11 +1,10 @@
1from html import parser, escape 1from html import parser, escape
2from typing import Dict, Any, List, Tuple, Set, Optional 2from typing import Any, Optional
3import re 3import re
4import string 4import string
5 5
6from . import abstract 6from . import abstract
7 7
8assert Set
9 8
10# pylint: disable=too-many-instance-attributes 9# pylint: disable=too-many-instance-attributes
11 10
@@ -26,7 +25,7 @@ class CSSParser(abstract.AbstractParser):
26 f.write(cleaned) 25 f.write(cleaned)
27 return True 26 return True
28 27
29 def get_meta(self) -> Dict[str, Any]: 28 def get_meta(self) -> dict[str, Any]:
30 metadata = {} 29 metadata = {}
31 with open(self.filename, encoding='utf-8') as f: 30 with open(self.filename, encoding='utf-8') as f:
32 try: 31 try:
@@ -45,10 +44,10 @@ class CSSParser(abstract.AbstractParser):
45 44
46 45
47class AbstractHTMLParser(abstract.AbstractParser): 46class AbstractHTMLParser(abstract.AbstractParser):
48 tags_blocklist = set() # type: Set[str] 47 tags_blocklist = set() # type: set[str]
49 # In some html/xml-based formats some tags are mandatory, 48 # In some html/xml-based formats some tags are mandatory,
50 # so we're keeping them, but are discarding their content 49 # so we're keeping them, but are discarding their content
51 tags_required_blocklist = set() # type: Set[str] 50 tags_required_blocklist = set() # type: set[str]
52 51
53 def __init__(self, filename): 52 def __init__(self, filename):
54 super().__init__(filename) 53 super().__init__(filename)
@@ -58,7 +57,7 @@ class AbstractHTMLParser(abstract.AbstractParser):
58 self.__parser.feed(f.read()) 57 self.__parser.feed(f.read())
59 self.__parser.close() 58 self.__parser.close()
60 59
61 def get_meta(self) -> Dict[str, Any]: 60 def get_meta(self) -> dict[str, Any]:
62 return self.__parser.get_meta() 61 return self.__parser.get_meta()
63 62
64 def remove_all(self) -> bool: 63 def remove_all(self) -> bool:
@@ -92,7 +91,7 @@ class _HTMLParser(parser.HTMLParser):
92 self.filename = filename 91 self.filename = filename
93 self.__textrepr = '' 92 self.__textrepr = ''
94 self.__meta = {} 93 self.__meta = {}
95 self.__validation_queue = [] # type: List[str] 94 self.__validation_queue = [] # type: list[str]
96 95
97 # We're using counters instead of booleans, to handle nested tags 96 # We're using counters instead of booleans, to handle nested tags
98 self.__in_dangerous_but_required_tag = 0 97 self.__in_dangerous_but_required_tag = 0
@@ -114,7 +113,7 @@ class _HTMLParser(parser.HTMLParser):
114 """ 113 """
115 raise ValueError(message) 114 raise ValueError(message)
116 115
117 def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]): 116 def handle_starttag(self, tag: str, attrs: list[tuple[str, Optional[str]]]):
118 # Ignore the type, because mypy is too stupid to infer 117 # Ignore the type, because mypy is too stupid to infer
119 # that get_starttag_text() can't return None. 118 # that get_starttag_text() can't return None.
120 original_tag = self.get_starttag_text() # type: ignore 119 original_tag = self.get_starttag_text() # type: ignore
@@ -161,7 +160,7 @@ class _HTMLParser(parser.HTMLParser):
161 self.__textrepr += escape(data) 160 self.__textrepr += escape(data)
162 161
163 def handle_startendtag(self, tag: str, 162 def handle_startendtag(self, tag: str,
164 attrs: List[Tuple[str, Optional[str]]]): 163 attrs: list[tuple[str, Optional[str]]]):
165 if tag in self.tag_required_blocklist | self.tag_blocklist: 164 if tag in self.tag_required_blocklist | self.tag_blocklist:
166 meta = {k:v for k, v in attrs} 165 meta = {k:v for k, v in attrs}
167 name = meta.get('name', 'harmful metadata') 166 name = meta.get('name', 'harmful metadata')
@@ -186,7 +185,7 @@ class _HTMLParser(parser.HTMLParser):
186 f.write(self.__textrepr) 185 f.write(self.__textrepr)
187 return True 186 return True
188 187
189 def get_meta(self) -> Dict[str, Any]: 188 def get_meta(self) -> dict[str, Any]:
190 if self.__validation_queue: 189 if self.__validation_queue:
191 raise ValueError("Some tags (%s) were left unclosed in %s" % ( 190 raise ValueError("Some tags (%s) were left unclosed in %s" % (
192 ', '.join(self.__validation_queue), 191 ', '.join(self.__validation_queue),