6 files changed, 39 insertions, 39 deletions
diff --git a/libmat2/exiftool.py b/libmat2/exiftool.py
index db92f60..d18b1fb 100644
--- a/libmat2/exiftool.py
+++ b/libmat2/exiftool.py
@@ -15,14 +15,14 @@ class ExiftoolParser(abstract.AbstractParser):
    from a import file, hence why several parsers are re-using its `get_meta`
    method.
    """
-    meta_whitelist = set()  # type: Set[str]
+    meta_allowlist = set()  # type: Set[str]
    def get_meta(self) -> Dict[str, Union[str, dict]]:
        out = subprocess.run([_get_exiftool_path(), '-json', self.filename],
                             input_filename=self.filename,
                             check=True, stdout=subprocess.PIPE).stdout
        meta = json.loads(out.decode('utf-8'))[0]
-        for key in self.meta_whitelist:
+        for key in self.meta_allowlist:
            meta.pop(key, None)
        return meta
diff --git a/libmat2/images.py b/libmat2/images.py
index dd3be53..32a329f 100644
--- a/libmat2/images.py
+++ b/libmat2/images.py
@@ -15,7 +15,7 @@ assert Set
 class PNGParser(exiftool.ExiftoolParser):
    mimetypes = {'image/png', }
-    meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
+    meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName',
                      'Directory', 'FileSize', 'FileModifyDate',
                      'FileAccessDate', 'FileInodeChangeDate',
                      'FilePermissions', 'FileType', 'FileTypeExtension',
@@ -44,7 +44,7 @@ class PNGParser(exiftool.ExiftoolParser):
 class GIFParser(exiftool.ExiftoolParser):
    mimetypes = {'image/gif'}
-    meta_whitelist = {'AnimationIterations', 'BackgroundColor', 'BitsPerPixel',
+    meta_allowlist = {'AnimationIterations', 'BackgroundColor', 'BitsPerPixel',
                      'ColorResolutionDepth', 'Directory', 'Duration',
                      'ExifToolVersion', 'FileAccessDate',
                      'FileInodeChangeDate', 'FileModifyDate', 'FileName',
@@ -86,7 +86,7 @@ class GdkPixbufAbstractParser(exiftool.ExiftoolParser):
 class JPGParser(GdkPixbufAbstractParser):
    _type = 'jpeg'
    mimetypes = {'image/jpeg'}
-    meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
+    meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName',
                      'Directory', 'FileSize', 'FileModifyDate',
                      'FileAccessDate', "FileInodeChangeDate",
                      'FilePermissions', 'FileType', 'FileTypeExtension',
@@ -99,7 +99,7 @@ class JPGParser(GdkPixbufAbstractParser):
 class TiffParser(GdkPixbufAbstractParser):
    _type = 'tiff'
    mimetypes = {'image/tiff'}
-    meta_whitelist = {'Compression', 'ExifByteOrder', 'ExtraSamples',
+    meta_allowlist = {'Compression', 'ExifByteOrder', 'ExtraSamples',
                      'FillOrder', 'PhotometricInterpretation',
                      'PlanarConfiguration', 'RowsPerStrip', 'SamplesPerPixel',
                      'StripByteCounts', 'StripOffsets', 'BitsPerSample',
diff --git a/libmat2/office.py b/libmat2/office.py
index f3a5b22..2c9cbff 100644
--- a/libmat2/office.py
+++ b/libmat2/office.py
@@ -89,7 +89,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
            r'^word/theme',
            r'^word/people\.xml$',
-            # we have a whitelist in self.files_to_keep,
+            # we have an allowlist in self.files_to_keep,
            # so we can trash everything else
            r'^word/_rels/',
        }))
@@ -100,7 +100,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
    def __fill_files_to_keep_via_content_types(self) -> bool:
        """ There is a suer-handy `[Content_Types].xml` file
        in MS Office archives, describing what each other file contains.
-        The self.content_types_to_keep member contains a type whitelist,
+        The self.content_types_to_keep member contains a type allowlist,
        so we're using it to fill the self.files_to_keep one.
        """
        with zipfile.ZipFile(self.filename) as zin:
@@ -220,7 +220,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
                for file_to_omit in self.files_to_omit:
                    if file_to_omit.search(fname):
                        matches = map(lambda r: r.search(fname), self.files_to_keep)
-                        if any(matches):  # the file is whitelisted
+                        if any(matches):  # the file is in the allowlist
                            continue
                        removed_fnames.add(fname)
                        break
diff --git a/libmat2/torrent.py b/libmat2/torrent.py
index c006f9c..6021d75 100644
--- a/libmat2/torrent.py
+++ b/libmat2/torrent.py
@@ -6,7 +6,7 @@ from . import abstract
 class TorrentParser(abstract.AbstractParser):
    mimetypes = {'application/x-bittorrent', }
-    whitelist = {b'announce', b'announce-list', b'info'}
+    allowlist = {b'announce', b'announce-list', b'info'}
    def __init__(self, filename):
        super().__init__(filename)
@@ -18,14 +18,14 @@ class TorrentParser(abstract.AbstractParser):
    def get_meta(self) -> Dict[str, Union[str, dict]]:
        metadata = {}
        for key, value in self.dict_repr.items():
-            if key not in self.whitelist:
+            if key not in self.allowlist:
                metadata[key.decode('utf-8')] = value
        return metadata
    def remove_all(self) -> bool:
        cleaned = dict()
        for key, value in self.dict_repr.items():
-            if key in self.whitelist:
+            if key in self.allowlist:
                cleaned[key] = value
        with open(self.output_filename, 'wb') as f:
            f.write(_BencodeHandler().bencode(cleaned))
diff --git a/libmat2/video.py b/libmat2/video.py
index 4f15b19..0060f78 100644
--- a/libmat2/video.py
+++ b/libmat2/video.py
@@ -10,10 +10,10 @@ from . import subprocess
 class AbstractFFmpegParser(exiftool.ExiftoolParser):
    """ Abstract parser for all FFmpeg-based ones, mainly for video. """
    # Some fileformats have mandatory metadata fields
-    meta_key_value_whitelist = {}  # type: Dict[str, Union[str, int]]
+    meta_key_value_allowlist = {}  # type: Dict[str, Union[str, int]]
    def remove_all(self) -> bool:
-        if self.meta_key_value_whitelist:
+        if self.meta_key_value_allowlist:
            logging.warning('The format of "%s" (%s) has some mandatory '
                            'metadata fields; mat2 filled them with standard '
                            'data.', self.filename, ', '.join(self.mimetypes))
@@ -45,8 +45,8 @@ class AbstractFFmpegParser(exiftool.ExiftoolParser):
        ret = dict()  # type: Dict[str, Union[str, dict]]
        for key, value in meta.items():
-            if key in self.meta_key_value_whitelist.keys():
+            if key in self.meta_key_value_allowlist.keys():
-                if value == self.meta_key_value_whitelist[key]:
+                if value == self.meta_key_value_allowlist[key]:
                    continue
            ret[key] = value
        return ret
@@ -54,7 +54,7 @@ class AbstractFFmpegParser(exiftool.ExiftoolParser):
 class WMVParser(AbstractFFmpegParser):
    mimetypes = {'video/x-ms-wmv', }
-    meta_whitelist = {'AudioChannels', 'AudioCodecID', 'AudioCodecName',
+    meta_allowlist = {'AudioChannels', 'AudioCodecID', 'AudioCodecName',
                      'ErrorCorrectionType', 'AudioSampleRate', 'DataPackets',
                      'Directory', 'Duration', 'ExifToolVersion',
                      'FileAccessDate', 'FileInodeChangeDate', 'FileLength',
@@ -64,7 +64,7 @@ class WMVParser(AbstractFFmpegParser):
                      'ImageWidth', 'MIMEType', 'MaxBitrate', 'MaxPacketSize',
                      'Megapixels', 'MinPacketSize', 'Preroll', 'SendDuration',
                      'SourceFile', 'StreamNumber', 'VideoCodecName', }
-    meta_key_value_whitelist = {  # some metadata are mandatory :/
+    meta_key_value_allowlist = {  # some metadata are mandatory :/
        'AudioCodecDescription': '',
        'CreationDate': '0000:00:00 00:00:00Z',
        'FileID': '00000000-0000-0000-0000-000000000000',
@@ -78,7 +78,7 @@ class WMVParser(AbstractFFmpegParser):
 class AVIParser(AbstractFFmpegParser):
    mimetypes = {'video/x-msvideo', }
-    meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',
+    meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',
                      'FileSize', 'FileModifyDate', 'FileAccessDate',
                      'FileInodeChangeDate', 'FilePermissions', 'FileType',
                      'FileTypeExtension', 'MIMEType', 'FrameRate', 'MaxDataRate',
@@ -98,7 +98,7 @@ class AVIParser(AbstractFFmpegParser):
 class MP4Parser(AbstractFFmpegParser):
    mimetypes = {'video/mp4', }
-    meta_whitelist = {'AudioFormat', 'AvgBitrate', 'Balance', 'TrackDuration',
+    meta_allowlist = {'AudioFormat', 'AvgBitrate', 'Balance', 'TrackDuration',
                      'XResolution', 'YResolution', 'ExifToolVersion',
                      'FileAccessDate', 'FileInodeChangeDate', 'FileModifyDate',
                      'FileName', 'FilePermissions', 'MIMEType', 'FileType',
@@ -109,7 +109,7 @@ class MP4Parser(AbstractFFmpegParser):
                      'MovieDataSize', 'VideoFrameRate', 'MediaTimeScale',
                      'SourceImageHeight', 'SourceImageWidth',
                      'MatrixStructure', 'MediaDuration'}
-    meta_key_value_whitelist = {  # some metadata are mandatory :/
+    meta_key_value_allowlist = {  # some metadata are mandatory :/
        'CreateDate': '0000:00:00 00:00:00',
        'CurrentTime': '0 s',
        'MediaCreateDate': '0000:00:00 00:00:00',
diff --git a/libmat2/web.py b/libmat2/web.py
index 34426b8..0a61908 100644
--- a/libmat2/web.py
+++ b/libmat2/web.py
@@ -37,15 +37,15 @@ class CSSParser(abstract.AbstractParser):
 class AbstractHTMLParser(abstract.AbstractParser):
-    tags_blacklist = set()  # type: Set[str]
+    tags_blocklist = set()  # type: Set[str]
    # In some html/xml-based formats some tags are mandatory,
    # so we're keeping them, but are discarding their content
-    tags_required_blacklist = set()  # type: Set[str]
+    tags_required_blocklist = set()  # type: Set[str]
    def __init__(self, filename):
        super().__init__(filename)
-        self.__parser = _HTMLParser(self.filename, self.tags_blacklist,
+        self.__parser = _HTMLParser(self.filename, self.tags_blocklist,
-                                    self.tags_required_blacklist)
+                                    self.tags_required_blocklist)
        with open(filename, encoding='utf-8') as f:
            self.__parser.feed(f.read())
        self.__parser.close()
@@ -59,13 +59,13 @@ class AbstractHTMLParser(abstract.AbstractParser):
 class HTMLParser(AbstractHTMLParser):
    mimetypes = {'text/html', }
-    tags_blacklist = {'meta', }
+    tags_blocklist = {'meta', }
-    tags_required_blacklist = {'title', }
+    tags_required_blocklist = {'title', }
 class DTBNCXParser(AbstractHTMLParser):
    mimetypes = {'application/x-dtbncx+xml', }
-    tags_required_blacklist = {'title', 'doctitle', 'meta'}
+    tags_required_blocklist = {'title', 'doctitle', 'meta'}
 class _HTMLParser(parser.HTMLParser):
@@ -79,7 +79,7 @@ class _HTMLParser(parser.HTMLParser):
    Also, gotcha: the `tag` parameters are always in lowercase.
    """
-    def __init__(self, filename, blacklisted_tags, required_blacklisted_tags):
+    def __init__(self, filename, blocklisted_tags, required_blocklisted_tags):
        super().__init__()
        self.filename = filename
        self.__textrepr = ''
@@ -90,24 +90,24 @@ class _HTMLParser(parser.HTMLParser):
        self.__in_dangerous_but_required_tag = 0
        self.__in_dangerous_tag = 0
-        if required_blacklisted_tags & blacklisted_tags:  # pragma: nocover
+        if required_blocklisted_tags & blocklisted_tags:  # pragma: nocover
            raise ValueError("There is an overlap between %s and %s" % (
-                required_blacklisted_tags, blacklisted_tags))
+                required_blocklisted_tags, blocklisted_tags))
-        self.tag_required_blacklist = required_blacklisted_tags
+        self.tag_required_blocklist = required_blocklisted_tags
-        self.tag_blacklist = blacklisted_tags
+        self.tag_blocklist = blocklisted_tags
    def handle_starttag(self, tag: str, attrs: List[Tuple[str, str]]):
        original_tag = self.get_starttag_text()
        self.__validation_queue.append(original_tag)
-        if tag in self.tag_blacklist:
+        if tag in self.tag_blocklist:
            self.__in_dangerous_tag += 1
        if self.__in_dangerous_tag == 0:
            if self.__in_dangerous_but_required_tag == 0:
                self.__textrepr += original_tag
-        if tag in self.tag_required_blacklist:
+        if tag in self.tag_required_blocklist:
            self.__in_dangerous_but_required_tag += 1
    def handle_endtag(self, tag: str):
@@ -123,7 +123,7 @@ class _HTMLParser(parser.HTMLParser):
                             "tag %s in %s" %
                             (tag, previous_tag, self.filename))
-        if tag in self.tag_required_blacklist:
+        if tag in self.tag_required_blocklist:
            self.__in_dangerous_but_required_tag -= 1
        if self.__in_dangerous_tag == 0:
@@ -131,7 +131,7 @@ class _HTMLParser(parser.HTMLParser):
                # There is no `get_endtag_text()` method :/
                self.__textrepr += '</' + previous_tag + '>'
-        if tag in self.tag_blacklist:
+        if tag in self.tag_blocklist:
            self.__in_dangerous_tag -= 1
    def handle_data(self, data: str):
@@ -141,14 +141,14 @@ class _HTMLParser(parser.HTMLParser):
                    self.__textrepr += escape(data)
    def handle_startendtag(self, tag: str, attrs: List[Tuple[str, str]]):
-        if tag in self.tag_required_blacklist | self.tag_blacklist:
+        if tag in self.tag_required_blocklist | self.tag_blocklist:
            meta = {k:v for k, v in attrs}
            name = meta.get('name', 'harmful metadata')
            content = meta.get('content', 'harmful data')
            self.__meta[name] = content
            if self.__in_dangerous_tag == 0:
-                if tag in self.tag_required_blacklist:
+                if tag in self.tag_required_blocklist:
                    self.__textrepr += '<' + tag + ' />'
                return

diff --git a/libmat2/exiftool.py b/libmat2/exiftool.py index db92f60..d18b1fb 100644 --- a/libmat2/exiftool.py +++ b/libmat2/exiftool.py
@@ -15,14 +15,14 @@ class ExiftoolParser(abstract.AbstractParser):
15	from a import file, hence why several parsers are re-using its `get_meta`	15	from a import file, hence why several parsers are re-using its `get_meta`
16	method.	16	method.
17	"""	17	"""
18	meta_whitelist = set() # type: Set[str]	18	meta_allowlist = set() # type: Set[str]
19		19
20	def get_meta(self) -> Dict[str, Union[str, dict]]:	20	def get_meta(self) -> Dict[str, Union[str, dict]]:
21	out = subprocess.run([_get_exiftool_path(), '-json', self.filename],	21	out = subprocess.run([_get_exiftool_path(), '-json', self.filename],
22	input_filename=self.filename,	22	input_filename=self.filename,
23	check=True, stdout=subprocess.PIPE).stdout	23	check=True, stdout=subprocess.PIPE).stdout
24	meta = json.loads(out.decode('utf-8'))[0]	24	meta = json.loads(out.decode('utf-8'))[0]
25	for key in self.meta_whitelist:	25	for key in self.meta_allowlist:
26	meta.pop(key, None)	26	meta.pop(key, None)
27	return meta	27	return meta
28		28


diff --git a/libmat2/images.py b/libmat2/images.py index dd3be53..32a329f 100644 --- a/libmat2/images.py +++ b/libmat2/images.py
@@ -15,7 +15,7 @@ assert Set
15		15
16	class PNGParser(exiftool.ExiftoolParser):	16	class PNGParser(exiftool.ExiftoolParser):
17	mimetypes = {'image/png', }	17	mimetypes = {'image/png', }
18	meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',	18	meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName',
19	'Directory', 'FileSize', 'FileModifyDate',	19	'Directory', 'FileSize', 'FileModifyDate',
20	'FileAccessDate', 'FileInodeChangeDate',	20	'FileAccessDate', 'FileInodeChangeDate',
21	'FilePermissions', 'FileType', 'FileTypeExtension',	21	'FilePermissions', 'FileType', 'FileTypeExtension',
@@ -44,7 +44,7 @@ class PNGParser(exiftool.ExiftoolParser):
44		44
45	class GIFParser(exiftool.ExiftoolParser):	45	class GIFParser(exiftool.ExiftoolParser):
46	mimetypes = {'image/gif'}	46	mimetypes = {'image/gif'}
47	meta_whitelist = {'AnimationIterations', 'BackgroundColor', 'BitsPerPixel',	47	meta_allowlist = {'AnimationIterations', 'BackgroundColor', 'BitsPerPixel',
48	'ColorResolutionDepth', 'Directory', 'Duration',	48	'ColorResolutionDepth', 'Directory', 'Duration',
49	'ExifToolVersion', 'FileAccessDate',	49	'ExifToolVersion', 'FileAccessDate',
50	'FileInodeChangeDate', 'FileModifyDate', 'FileName',	50	'FileInodeChangeDate', 'FileModifyDate', 'FileName',
@@ -86,7 +86,7 @@ class GdkPixbufAbstractParser(exiftool.ExiftoolParser):
86	class JPGParser(GdkPixbufAbstractParser):	86	class JPGParser(GdkPixbufAbstractParser):
87	_type = 'jpeg'	87	_type = 'jpeg'
88	mimetypes = {'image/jpeg'}	88	mimetypes = {'image/jpeg'}
89	meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',	89	meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName',
90	'Directory', 'FileSize', 'FileModifyDate',	90	'Directory', 'FileSize', 'FileModifyDate',
91	'FileAccessDate', "FileInodeChangeDate",	91	'FileAccessDate', "FileInodeChangeDate",
92	'FilePermissions', 'FileType', 'FileTypeExtension',	92	'FilePermissions', 'FileType', 'FileTypeExtension',
@@ -99,7 +99,7 @@ class JPGParser(GdkPixbufAbstractParser):
99	class TiffParser(GdkPixbufAbstractParser):	99	class TiffParser(GdkPixbufAbstractParser):
100	_type = 'tiff'	100	_type = 'tiff'
101	mimetypes = {'image/tiff'}	101	mimetypes = {'image/tiff'}
102	meta_whitelist = {'Compression', 'ExifByteOrder', 'ExtraSamples',	102	meta_allowlist = {'Compression', 'ExifByteOrder', 'ExtraSamples',
103	'FillOrder', 'PhotometricInterpretation',	103	'FillOrder', 'PhotometricInterpretation',
104	'PlanarConfiguration', 'RowsPerStrip', 'SamplesPerPixel',	104	'PlanarConfiguration', 'RowsPerStrip', 'SamplesPerPixel',
105	'StripByteCounts', 'StripOffsets', 'BitsPerSample',	105	'StripByteCounts', 'StripOffsets', 'BitsPerSample',


diff --git a/libmat2/office.py b/libmat2/office.py index f3a5b22..2c9cbff 100644 --- a/libmat2/office.py +++ b/libmat2/office.py
@@ -89,7 +89,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
89	r'^word/theme',	89	r'^word/theme',
90	r'^word/people\.xml$',	90	r'^word/people\.xml$',
91		91
92	# we have a whitelist in self.files_to_keep,	92	# we have an allowlist in self.files_to_keep,
93	# so we can trash everything else	93	# so we can trash everything else
94	r'^word/_rels/',	94	r'^word/_rels/',
95	}))	95	}))
@@ -100,7 +100,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
100	def __fill_files_to_keep_via_content_types(self) -> bool:	100	def __fill_files_to_keep_via_content_types(self) -> bool:
101	""" There is a suer-handy `[Content_Types].xml` file	101	""" There is a suer-handy `[Content_Types].xml` file
102	in MS Office archives, describing what each other file contains.	102	in MS Office archives, describing what each other file contains.
103	The self.content_types_to_keep member contains a type whitelist,	103	The self.content_types_to_keep member contains a type allowlist,
104	so we're using it to fill the self.files_to_keep one.	104	so we're using it to fill the self.files_to_keep one.
105	"""	105	"""
106	with zipfile.ZipFile(self.filename) as zin:	106	with zipfile.ZipFile(self.filename) as zin:
@@ -220,7 +220,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
220	for file_to_omit in self.files_to_omit:	220	for file_to_omit in self.files_to_omit:
221	if file_to_omit.search(fname):	221	if file_to_omit.search(fname):
222	matches = map(lambda r: r.search(fname), self.files_to_keep)	222	matches = map(lambda r: r.search(fname), self.files_to_keep)
223	if any(matches): # the file is whitelisted	223	if any(matches): # the file is in the allowlist
224	continue	224	continue
225	removed_fnames.add(fname)	225	removed_fnames.add(fname)
226	break	226	break


diff --git a/libmat2/torrent.py b/libmat2/torrent.py index c006f9c..6021d75 100644 --- a/libmat2/torrent.py +++ b/libmat2/torrent.py
@@ -6,7 +6,7 @@ from . import abstract
6		6
7	class TorrentParser(abstract.AbstractParser):	7	class TorrentParser(abstract.AbstractParser):
8	mimetypes = {'application/x-bittorrent', }	8	mimetypes = {'application/x-bittorrent', }
9	whitelist = {b'announce', b'announce-list', b'info'}	9	allowlist = {b'announce', b'announce-list', b'info'}
10		10
11	def __init__(self, filename):	11	def __init__(self, filename):
12	super().__init__(filename)	12	super().__init__(filename)
@@ -18,14 +18,14 @@ class TorrentParser(abstract.AbstractParser):
18	def get_meta(self) -> Dict[str, Union[str, dict]]:	18	def get_meta(self) -> Dict[str, Union[str, dict]]:
19	metadata = {}	19	metadata = {}
20	for key, value in self.dict_repr.items():	20	for key, value in self.dict_repr.items():
21	if key not in self.whitelist:	21	if key not in self.allowlist:
22	metadata[key.decode('utf-8')] = value	22	metadata[key.decode('utf-8')] = value
23	return metadata	23	return metadata
24		24
25	def remove_all(self) -> bool:	25	def remove_all(self) -> bool:
26	cleaned = dict()	26	cleaned = dict()
27	for key, value in self.dict_repr.items():	27	for key, value in self.dict_repr.items():
28	if key in self.whitelist:	28	if key in self.allowlist:
29	cleaned[key] = value	29	cleaned[key] = value
30	with open(self.output_filename, 'wb') as f:	30	with open(self.output_filename, 'wb') as f:
31	f.write(_BencodeHandler().bencode(cleaned))	31	f.write(_BencodeHandler().bencode(cleaned))


diff --git a/libmat2/video.py b/libmat2/video.py index 4f15b19..0060f78 100644 --- a/libmat2/video.py +++ b/libmat2/video.py
@@ -10,10 +10,10 @@ from . import subprocess
10	class AbstractFFmpegParser(exiftool.ExiftoolParser):	10	class AbstractFFmpegParser(exiftool.ExiftoolParser):
11	""" Abstract parser for all FFmpeg-based ones, mainly for video. """	11	""" Abstract parser for all FFmpeg-based ones, mainly for video. """
12	# Some fileformats have mandatory metadata fields	12	# Some fileformats have mandatory metadata fields
13	meta_key_value_whitelist = {} # type: Dict[str, Union[str, int]]	13	meta_key_value_allowlist = {} # type: Dict[str, Union[str, int]]
14		14
15	def remove_all(self) -> bool:	15	def remove_all(self) -> bool:
16	if self.meta_key_value_whitelist:	16	if self.meta_key_value_allowlist:
17	logging.warning('The format of "%s" (%s) has some mandatory '	17	logging.warning('The format of "%s" (%s) has some mandatory '
18	'metadata fields; mat2 filled them with standard '	18	'metadata fields; mat2 filled them with standard '
19	'data.', self.filename, ', '.join(self.mimetypes))	19	'data.', self.filename, ', '.join(self.mimetypes))
@@ -45,8 +45,8 @@ class AbstractFFmpegParser(exiftool.ExiftoolParser):
45		45
46	ret = dict() # type: Dict[str, Union[str, dict]]	46	ret = dict() # type: Dict[str, Union[str, dict]]
47	for key, value in meta.items():	47	for key, value in meta.items():
48	if key in self.meta_key_value_whitelist.keys():	48	if key in self.meta_key_value_allowlist.keys():
49	if value == self.meta_key_value_whitelist[key]:	49	if value == self.meta_key_value_allowlist[key]:
50	continue	50	continue
51	ret[key] = value	51	ret[key] = value
52	return ret	52	return ret
@@ -54,7 +54,7 @@ class AbstractFFmpegParser(exiftool.ExiftoolParser):
54		54
55	class WMVParser(AbstractFFmpegParser):	55	class WMVParser(AbstractFFmpegParser):
56	mimetypes = {'video/x-ms-wmv', }	56	mimetypes = {'video/x-ms-wmv', }
57	meta_whitelist = {'AudioChannels', 'AudioCodecID', 'AudioCodecName',	57	meta_allowlist = {'AudioChannels', 'AudioCodecID', 'AudioCodecName',
58	'ErrorCorrectionType', 'AudioSampleRate', 'DataPackets',	58	'ErrorCorrectionType', 'AudioSampleRate', 'DataPackets',
59	'Directory', 'Duration', 'ExifToolVersion',	59	'Directory', 'Duration', 'ExifToolVersion',
60	'FileAccessDate', 'FileInodeChangeDate', 'FileLength',	60	'FileAccessDate', 'FileInodeChangeDate', 'FileLength',
@@ -64,7 +64,7 @@ class WMVParser(AbstractFFmpegParser):
64	'ImageWidth', 'MIMEType', 'MaxBitrate', 'MaxPacketSize',	64	'ImageWidth', 'MIMEType', 'MaxBitrate', 'MaxPacketSize',
65	'Megapixels', 'MinPacketSize', 'Preroll', 'SendDuration',	65	'Megapixels', 'MinPacketSize', 'Preroll', 'SendDuration',
66	'SourceFile', 'StreamNumber', 'VideoCodecName', }	66	'SourceFile', 'StreamNumber', 'VideoCodecName', }
67	meta_key_value_whitelist = { # some metadata are mandatory :/	67	meta_key_value_allowlist = { # some metadata are mandatory :/
68	'AudioCodecDescription': '',	68	'AudioCodecDescription': '',
69	'CreationDate': '0000:00:00 00:00:00Z',	69	'CreationDate': '0000:00:00 00:00:00Z',
70	'FileID': '00000000-0000-0000-0000-000000000000',	70	'FileID': '00000000-0000-0000-0000-000000000000',
@@ -78,7 +78,7 @@ class WMVParser(AbstractFFmpegParser):
78		78
79	class AVIParser(AbstractFFmpegParser):	79	class AVIParser(AbstractFFmpegParser):
80	mimetypes = {'video/x-msvideo', }	80	mimetypes = {'video/x-msvideo', }
81	meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',	81	meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',
82	'FileSize', 'FileModifyDate', 'FileAccessDate',	82	'FileSize', 'FileModifyDate', 'FileAccessDate',
83	'FileInodeChangeDate', 'FilePermissions', 'FileType',	83	'FileInodeChangeDate', 'FilePermissions', 'FileType',
84	'FileTypeExtension', 'MIMEType', 'FrameRate', 'MaxDataRate',	84	'FileTypeExtension', 'MIMEType', 'FrameRate', 'MaxDataRate',
@@ -98,7 +98,7 @@ class AVIParser(AbstractFFmpegParser):
98		98
99	class MP4Parser(AbstractFFmpegParser):	99	class MP4Parser(AbstractFFmpegParser):
100	mimetypes = {'video/mp4', }	100	mimetypes = {'video/mp4', }
101	meta_whitelist = {'AudioFormat', 'AvgBitrate', 'Balance', 'TrackDuration',	101	meta_allowlist = {'AudioFormat', 'AvgBitrate', 'Balance', 'TrackDuration',
102	'XResolution', 'YResolution', 'ExifToolVersion',	102	'XResolution', 'YResolution', 'ExifToolVersion',
103	'FileAccessDate', 'FileInodeChangeDate', 'FileModifyDate',	103	'FileAccessDate', 'FileInodeChangeDate', 'FileModifyDate',
104	'FileName', 'FilePermissions', 'MIMEType', 'FileType',	104	'FileName', 'FilePermissions', 'MIMEType', 'FileType',
@@ -109,7 +109,7 @@ class MP4Parser(AbstractFFmpegParser):
109	'MovieDataSize', 'VideoFrameRate', 'MediaTimeScale',	109	'MovieDataSize', 'VideoFrameRate', 'MediaTimeScale',
110	'SourceImageHeight', 'SourceImageWidth',	110	'SourceImageHeight', 'SourceImageWidth',
111	'MatrixStructure', 'MediaDuration'}	111	'MatrixStructure', 'MediaDuration'}
112	meta_key_value_whitelist = { # some metadata are mandatory :/	112	meta_key_value_allowlist = { # some metadata are mandatory :/
113	'CreateDate': '0000:00:00 00:00:00',	113	'CreateDate': '0000:00:00 00:00:00',
114	'CurrentTime': '0 s',	114	'CurrentTime': '0 s',
115	'MediaCreateDate': '0000:00:00 00:00:00',	115	'MediaCreateDate': '0000:00:00 00:00:00',


diff --git a/libmat2/web.py b/libmat2/web.py index 34426b8..0a61908 100644 --- a/libmat2/web.py +++ b/libmat2/web.py
@@ -37,15 +37,15 @@ class CSSParser(abstract.AbstractParser):
37		37
38		38
39	class AbstractHTMLParser(abstract.AbstractParser):	39	class AbstractHTMLParser(abstract.AbstractParser):
40	tags_blacklist = set() # type: Set[str]	40	tags_blocklist = set() # type: Set[str]
41	# In some html/xml-based formats some tags are mandatory,	41	# In some html/xml-based formats some tags are mandatory,
42	# so we're keeping them, but are discarding their content	42	# so we're keeping them, but are discarding their content
43	tags_required_blacklist = set() # type: Set[str]	43	tags_required_blocklist = set() # type: Set[str]
44		44
45	def __init__(self, filename):	45	def __init__(self, filename):
46	super().__init__(filename)	46	super().__init__(filename)
47	self.__parser = _HTMLParser(self.filename, self.tags_blacklist,	47	self.__parser = _HTMLParser(self.filename, self.tags_blocklist,
48	self.tags_required_blacklist)	48	self.tags_required_blocklist)
49	with open(filename, encoding='utf-8') as f:	49	with open(filename, encoding='utf-8') as f:
50	self.__parser.feed(f.read())	50	self.__parser.feed(f.read())
51	self.__parser.close()	51	self.__parser.close()
@@ -59,13 +59,13 @@ class AbstractHTMLParser(abstract.AbstractParser):
59		59
60	class HTMLParser(AbstractHTMLParser):	60	class HTMLParser(AbstractHTMLParser):
61	mimetypes = {'text/html', }	61	mimetypes = {'text/html', }
62	tags_blacklist = {'meta', }	62	tags_blocklist = {'meta', }
63	tags_required_blacklist = {'title', }	63	tags_required_blocklist = {'title', }
64		64
65		65
66	class DTBNCXParser(AbstractHTMLParser):	66	class DTBNCXParser(AbstractHTMLParser):
67	mimetypes = {'application/x-dtbncx+xml', }	67	mimetypes = {'application/x-dtbncx+xml', }
68	tags_required_blacklist = {'title', 'doctitle', 'meta'}	68	tags_required_blocklist = {'title', 'doctitle', 'meta'}
69		69
70		70
71	class _HTMLParser(parser.HTMLParser):	71	class _HTMLParser(parser.HTMLParser):
@@ -79,7 +79,7 @@ class _HTMLParser(parser.HTMLParser):
79		79
80	Also, gotcha: the `tag` parameters are always in lowercase.	80	Also, gotcha: the `tag` parameters are always in lowercase.
81	"""	81	"""
82	def __init__(self, filename, blacklisted_tags, required_blacklisted_tags):	82	def __init__(self, filename, blocklisted_tags, required_blocklisted_tags):
83	super().__init__()	83	super().__init__()
84	self.filename = filename	84	self.filename = filename
85	self.__textrepr = ''	85	self.__textrepr = ''
@@ -90,24 +90,24 @@ class _HTMLParser(parser.HTMLParser):
90	self.__in_dangerous_but_required_tag = 0	90	self.__in_dangerous_but_required_tag = 0
91	self.__in_dangerous_tag = 0	91	self.__in_dangerous_tag = 0
92		92
93	if required_blacklisted_tags & blacklisted_tags: # pragma: nocover	93	if required_blocklisted_tags & blocklisted_tags: # pragma: nocover
94	raise ValueError("There is an overlap between %s and %s" % (	94	raise ValueError("There is an overlap between %s and %s" % (
95	required_blacklisted_tags, blacklisted_tags))	95	required_blocklisted_tags, blocklisted_tags))
96	self.tag_required_blacklist = required_blacklisted_tags	96	self.tag_required_blocklist = required_blocklisted_tags
97	self.tag_blacklist = blacklisted_tags	97	self.tag_blocklist = blocklisted_tags
98		98
99	def handle_starttag(self, tag: str, attrs: List[Tuple[str, str]]):	99	def handle_starttag(self, tag: str, attrs: List[Tuple[str, str]]):
100	original_tag = self.get_starttag_text()	100	original_tag = self.get_starttag_text()
101	self.__validation_queue.append(original_tag)	101	self.__validation_queue.append(original_tag)
102		102
103	if tag in self.tag_blacklist:	103	if tag in self.tag_blocklist:
104	self.__in_dangerous_tag += 1	104	self.__in_dangerous_tag += 1
105		105
106	if self.__in_dangerous_tag == 0:	106	if self.__in_dangerous_tag == 0:
107	if self.__in_dangerous_but_required_tag == 0:	107	if self.__in_dangerous_but_required_tag == 0:
108	self.__textrepr += original_tag	108	self.__textrepr += original_tag
109		109
110	if tag in self.tag_required_blacklist:	110	if tag in self.tag_required_blocklist:
111	self.__in_dangerous_but_required_tag += 1	111	self.__in_dangerous_but_required_tag += 1
112		112
113	def handle_endtag(self, tag: str):	113	def handle_endtag(self, tag: str):
@@ -123,7 +123,7 @@ class _HTMLParser(parser.HTMLParser):
123	"tag %s in %s" %	123	"tag %s in %s" %
124	(tag, previous_tag, self.filename))	124	(tag, previous_tag, self.filename))
125		125
126	if tag in self.tag_required_blacklist:	126	if tag in self.tag_required_blocklist:
127	self.__in_dangerous_but_required_tag -= 1	127	self.__in_dangerous_but_required_tag -= 1
128		128
129	if self.__in_dangerous_tag == 0:	129	if self.__in_dangerous_tag == 0:
@@ -131,7 +131,7 @@ class _HTMLParser(parser.HTMLParser):
131	# There is no `get_endtag_text()` method :/	131	# There is no `get_endtag_text()` method :/
132	self.__textrepr += '</' + previous_tag + '>'	132	self.__textrepr += '</' + previous_tag + '>'
133		133
134	if tag in self.tag_blacklist:	134	if tag in self.tag_blocklist:
135	self.__in_dangerous_tag -= 1	135	self.__in_dangerous_tag -= 1
136		136
137	def handle_data(self, data: str):	137	def handle_data(self, data: str):
@@ -141,14 +141,14 @@ class _HTMLParser(parser.HTMLParser):
141	self.__textrepr += escape(data)	141	self.__textrepr += escape(data)
142		142
143	def handle_startendtag(self, tag: str, attrs: List[Tuple[str, str]]):	143	def handle_startendtag(self, tag: str, attrs: List[Tuple[str, str]]):
144	if tag in self.tag_required_blacklist \| self.tag_blacklist:	144	if tag in self.tag_required_blocklist \| self.tag_blocklist:
145	meta = {k:v for k, v in attrs}	145	meta = {k:v for k, v in attrs}
146	name = meta.get('name', 'harmful metadata')	146	name = meta.get('name', 'harmful metadata')
147	content = meta.get('content', 'harmful data')	147	content = meta.get('content', 'harmful data')
148	self.__meta[name] = content	148	self.__meta[name] = content
149		149
150	if self.__in_dangerous_tag == 0:	150	if self.__in_dangerous_tag == 0:
151	if tag in self.tag_required_blacklist:	151	if tag in self.tag_required_blocklist:
152	self.__textrepr += '<' + tag + ' />'	152	self.__textrepr += '<' + tag + ' />'
153	return	153	return
154		154