summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libmat2/exiftool.py4
-rw-r--r--libmat2/images.py8
-rw-r--r--libmat2/office.py6
-rw-r--r--libmat2/torrent.py6
-rw-r--r--libmat2/video.py18
-rw-r--r--libmat2/web.py36
6 files changed, 39 insertions, 39 deletions
diff --git a/libmat2/exiftool.py b/libmat2/exiftool.py
index db92f60..d18b1fb 100644
--- a/libmat2/exiftool.py
+++ b/libmat2/exiftool.py
@@ -15,14 +15,14 @@ class ExiftoolParser(abstract.AbstractParser):
15 from a import file, hence why several parsers are re-using its `get_meta` 15 from a import file, hence why several parsers are re-using its `get_meta`
16 method. 16 method.
17 """ 17 """
18 meta_whitelist = set() # type: Set[str] 18 meta_allowlist = set() # type: Set[str]
19 19
20 def get_meta(self) -> Dict[str, Union[str, dict]]: 20 def get_meta(self) -> Dict[str, Union[str, dict]]:
21 out = subprocess.run([_get_exiftool_path(), '-json', self.filename], 21 out = subprocess.run([_get_exiftool_path(), '-json', self.filename],
22 input_filename=self.filename, 22 input_filename=self.filename,
23 check=True, stdout=subprocess.PIPE).stdout 23 check=True, stdout=subprocess.PIPE).stdout
24 meta = json.loads(out.decode('utf-8'))[0] 24 meta = json.loads(out.decode('utf-8'))[0]
25 for key in self.meta_whitelist: 25 for key in self.meta_allowlist:
26 meta.pop(key, None) 26 meta.pop(key, None)
27 return meta 27 return meta
28 28
diff --git a/libmat2/images.py b/libmat2/images.py
index dd3be53..32a329f 100644
--- a/libmat2/images.py
+++ b/libmat2/images.py
@@ -15,7 +15,7 @@ assert Set
15 15
16class PNGParser(exiftool.ExiftoolParser): 16class PNGParser(exiftool.ExiftoolParser):
17 mimetypes = {'image/png', } 17 mimetypes = {'image/png', }
18 meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 18 meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName',
19 'Directory', 'FileSize', 'FileModifyDate', 19 'Directory', 'FileSize', 'FileModifyDate',
20 'FileAccessDate', 'FileInodeChangeDate', 20 'FileAccessDate', 'FileInodeChangeDate',
21 'FilePermissions', 'FileType', 'FileTypeExtension', 21 'FilePermissions', 'FileType', 'FileTypeExtension',
@@ -44,7 +44,7 @@ class PNGParser(exiftool.ExiftoolParser):
44 44
45class GIFParser(exiftool.ExiftoolParser): 45class GIFParser(exiftool.ExiftoolParser):
46 mimetypes = {'image/gif'} 46 mimetypes = {'image/gif'}
47 meta_whitelist = {'AnimationIterations', 'BackgroundColor', 'BitsPerPixel', 47 meta_allowlist = {'AnimationIterations', 'BackgroundColor', 'BitsPerPixel',
48 'ColorResolutionDepth', 'Directory', 'Duration', 48 'ColorResolutionDepth', 'Directory', 'Duration',
49 'ExifToolVersion', 'FileAccessDate', 49 'ExifToolVersion', 'FileAccessDate',
50 'FileInodeChangeDate', 'FileModifyDate', 'FileName', 50 'FileInodeChangeDate', 'FileModifyDate', 'FileName',
@@ -86,7 +86,7 @@ class GdkPixbufAbstractParser(exiftool.ExiftoolParser):
86class JPGParser(GdkPixbufAbstractParser): 86class JPGParser(GdkPixbufAbstractParser):
87 _type = 'jpeg' 87 _type = 'jpeg'
88 mimetypes = {'image/jpeg'} 88 mimetypes = {'image/jpeg'}
89 meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 89 meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName',
90 'Directory', 'FileSize', 'FileModifyDate', 90 'Directory', 'FileSize', 'FileModifyDate',
91 'FileAccessDate', "FileInodeChangeDate", 91 'FileAccessDate', "FileInodeChangeDate",
92 'FilePermissions', 'FileType', 'FileTypeExtension', 92 'FilePermissions', 'FileType', 'FileTypeExtension',
@@ -99,7 +99,7 @@ class JPGParser(GdkPixbufAbstractParser):
99class TiffParser(GdkPixbufAbstractParser): 99class TiffParser(GdkPixbufAbstractParser):
100 _type = 'tiff' 100 _type = 'tiff'
101 mimetypes = {'image/tiff'} 101 mimetypes = {'image/tiff'}
102 meta_whitelist = {'Compression', 'ExifByteOrder', 'ExtraSamples', 102 meta_allowlist = {'Compression', 'ExifByteOrder', 'ExtraSamples',
103 'FillOrder', 'PhotometricInterpretation', 103 'FillOrder', 'PhotometricInterpretation',
104 'PlanarConfiguration', 'RowsPerStrip', 'SamplesPerPixel', 104 'PlanarConfiguration', 'RowsPerStrip', 'SamplesPerPixel',
105 'StripByteCounts', 'StripOffsets', 'BitsPerSample', 105 'StripByteCounts', 'StripOffsets', 'BitsPerSample',
diff --git a/libmat2/office.py b/libmat2/office.py
index f3a5b22..2c9cbff 100644
--- a/libmat2/office.py
+++ b/libmat2/office.py
@@ -89,7 +89,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
89 r'^word/theme', 89 r'^word/theme',
90 r'^word/people\.xml$', 90 r'^word/people\.xml$',
91 91
92 # we have a whitelist in self.files_to_keep, 92 # we have an allowlist in self.files_to_keep,
93 # so we can trash everything else 93 # so we can trash everything else
94 r'^word/_rels/', 94 r'^word/_rels/',
95 })) 95 }))
@@ -100,7 +100,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
100 def __fill_files_to_keep_via_content_types(self) -> bool: 100 def __fill_files_to_keep_via_content_types(self) -> bool:
101 """ There is a suer-handy `[Content_Types].xml` file 101 """ There is a suer-handy `[Content_Types].xml` file
102 in MS Office archives, describing what each other file contains. 102 in MS Office archives, describing what each other file contains.
103 The self.content_types_to_keep member contains a type whitelist, 103 The self.content_types_to_keep member contains a type allowlist,
104 so we're using it to fill the self.files_to_keep one. 104 so we're using it to fill the self.files_to_keep one.
105 """ 105 """
106 with zipfile.ZipFile(self.filename) as zin: 106 with zipfile.ZipFile(self.filename) as zin:
@@ -220,7 +220,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
220 for file_to_omit in self.files_to_omit: 220 for file_to_omit in self.files_to_omit:
221 if file_to_omit.search(fname): 221 if file_to_omit.search(fname):
222 matches = map(lambda r: r.search(fname), self.files_to_keep) 222 matches = map(lambda r: r.search(fname), self.files_to_keep)
223 if any(matches): # the file is whitelisted 223 if any(matches): # the file is in the allowlist
224 continue 224 continue
225 removed_fnames.add(fname) 225 removed_fnames.add(fname)
226 break 226 break
diff --git a/libmat2/torrent.py b/libmat2/torrent.py
index c006f9c..6021d75 100644
--- a/libmat2/torrent.py
+++ b/libmat2/torrent.py
@@ -6,7 +6,7 @@ from . import abstract
6 6
7class TorrentParser(abstract.AbstractParser): 7class TorrentParser(abstract.AbstractParser):
8 mimetypes = {'application/x-bittorrent', } 8 mimetypes = {'application/x-bittorrent', }
9 whitelist = {b'announce', b'announce-list', b'info'} 9 allowlist = {b'announce', b'announce-list', b'info'}
10 10
11 def __init__(self, filename): 11 def __init__(self, filename):
12 super().__init__(filename) 12 super().__init__(filename)
@@ -18,14 +18,14 @@ class TorrentParser(abstract.AbstractParser):
18 def get_meta(self) -> Dict[str, Union[str, dict]]: 18 def get_meta(self) -> Dict[str, Union[str, dict]]:
19 metadata = {} 19 metadata = {}
20 for key, value in self.dict_repr.items(): 20 for key, value in self.dict_repr.items():
21 if key not in self.whitelist: 21 if key not in self.allowlist:
22 metadata[key.decode('utf-8')] = value 22 metadata[key.decode('utf-8')] = value
23 return metadata 23 return metadata
24 24
25 def remove_all(self) -> bool: 25 def remove_all(self) -> bool:
26 cleaned = dict() 26 cleaned = dict()
27 for key, value in self.dict_repr.items(): 27 for key, value in self.dict_repr.items():
28 if key in self.whitelist: 28 if key in self.allowlist:
29 cleaned[key] = value 29 cleaned[key] = value
30 with open(self.output_filename, 'wb') as f: 30 with open(self.output_filename, 'wb') as f:
31 f.write(_BencodeHandler().bencode(cleaned)) 31 f.write(_BencodeHandler().bencode(cleaned))
diff --git a/libmat2/video.py b/libmat2/video.py
index 4f15b19..0060f78 100644
--- a/libmat2/video.py
+++ b/libmat2/video.py
@@ -10,10 +10,10 @@ from . import subprocess
10class AbstractFFmpegParser(exiftool.ExiftoolParser): 10class AbstractFFmpegParser(exiftool.ExiftoolParser):
11 """ Abstract parser for all FFmpeg-based ones, mainly for video. """ 11 """ Abstract parser for all FFmpeg-based ones, mainly for video. """
12 # Some fileformats have mandatory metadata fields 12 # Some fileformats have mandatory metadata fields
13 meta_key_value_whitelist = {} # type: Dict[str, Union[str, int]] 13 meta_key_value_allowlist = {} # type: Dict[str, Union[str, int]]
14 14
15 def remove_all(self) -> bool: 15 def remove_all(self) -> bool:
16 if self.meta_key_value_whitelist: 16 if self.meta_key_value_allowlist:
17 logging.warning('The format of "%s" (%s) has some mandatory ' 17 logging.warning('The format of "%s" (%s) has some mandatory '
18 'metadata fields; mat2 filled them with standard ' 18 'metadata fields; mat2 filled them with standard '
19 'data.', self.filename, ', '.join(self.mimetypes)) 19 'data.', self.filename, ', '.join(self.mimetypes))
@@ -45,8 +45,8 @@ class AbstractFFmpegParser(exiftool.ExiftoolParser):
45 45
46 ret = dict() # type: Dict[str, Union[str, dict]] 46 ret = dict() # type: Dict[str, Union[str, dict]]
47 for key, value in meta.items(): 47 for key, value in meta.items():
48 if key in self.meta_key_value_whitelist.keys(): 48 if key in self.meta_key_value_allowlist.keys():
49 if value == self.meta_key_value_whitelist[key]: 49 if value == self.meta_key_value_allowlist[key]:
50 continue 50 continue
51 ret[key] = value 51 ret[key] = value
52 return ret 52 return ret
@@ -54,7 +54,7 @@ class AbstractFFmpegParser(exiftool.ExiftoolParser):
54 54
55class WMVParser(AbstractFFmpegParser): 55class WMVParser(AbstractFFmpegParser):
56 mimetypes = {'video/x-ms-wmv', } 56 mimetypes = {'video/x-ms-wmv', }
57 meta_whitelist = {'AudioChannels', 'AudioCodecID', 'AudioCodecName', 57 meta_allowlist = {'AudioChannels', 'AudioCodecID', 'AudioCodecName',
58 'ErrorCorrectionType', 'AudioSampleRate', 'DataPackets', 58 'ErrorCorrectionType', 'AudioSampleRate', 'DataPackets',
59 'Directory', 'Duration', 'ExifToolVersion', 59 'Directory', 'Duration', 'ExifToolVersion',
60 'FileAccessDate', 'FileInodeChangeDate', 'FileLength', 60 'FileAccessDate', 'FileInodeChangeDate', 'FileLength',
@@ -64,7 +64,7 @@ class WMVParser(AbstractFFmpegParser):
64 'ImageWidth', 'MIMEType', 'MaxBitrate', 'MaxPacketSize', 64 'ImageWidth', 'MIMEType', 'MaxBitrate', 'MaxPacketSize',
65 'Megapixels', 'MinPacketSize', 'Preroll', 'SendDuration', 65 'Megapixels', 'MinPacketSize', 'Preroll', 'SendDuration',
66 'SourceFile', 'StreamNumber', 'VideoCodecName', } 66 'SourceFile', 'StreamNumber', 'VideoCodecName', }
67 meta_key_value_whitelist = { # some metadata are mandatory :/ 67 meta_key_value_allowlist = { # some metadata are mandatory :/
68 'AudioCodecDescription': '', 68 'AudioCodecDescription': '',
69 'CreationDate': '0000:00:00 00:00:00Z', 69 'CreationDate': '0000:00:00 00:00:00Z',
70 'FileID': '00000000-0000-0000-0000-000000000000', 70 'FileID': '00000000-0000-0000-0000-000000000000',
@@ -78,7 +78,7 @@ class WMVParser(AbstractFFmpegParser):
78 78
79class AVIParser(AbstractFFmpegParser): 79class AVIParser(AbstractFFmpegParser):
80 mimetypes = {'video/x-msvideo', } 80 mimetypes = {'video/x-msvideo', }
81 meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory', 81 meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',
82 'FileSize', 'FileModifyDate', 'FileAccessDate', 82 'FileSize', 'FileModifyDate', 'FileAccessDate',
83 'FileInodeChangeDate', 'FilePermissions', 'FileType', 83 'FileInodeChangeDate', 'FilePermissions', 'FileType',
84 'FileTypeExtension', 'MIMEType', 'FrameRate', 'MaxDataRate', 84 'FileTypeExtension', 'MIMEType', 'FrameRate', 'MaxDataRate',
@@ -98,7 +98,7 @@ class AVIParser(AbstractFFmpegParser):
98 98
99class MP4Parser(AbstractFFmpegParser): 99class MP4Parser(AbstractFFmpegParser):
100 mimetypes = {'video/mp4', } 100 mimetypes = {'video/mp4', }
101 meta_whitelist = {'AudioFormat', 'AvgBitrate', 'Balance', 'TrackDuration', 101 meta_allowlist = {'AudioFormat', 'AvgBitrate', 'Balance', 'TrackDuration',
102 'XResolution', 'YResolution', 'ExifToolVersion', 102 'XResolution', 'YResolution', 'ExifToolVersion',
103 'FileAccessDate', 'FileInodeChangeDate', 'FileModifyDate', 103 'FileAccessDate', 'FileInodeChangeDate', 'FileModifyDate',
104 'FileName', 'FilePermissions', 'MIMEType', 'FileType', 104 'FileName', 'FilePermissions', 'MIMEType', 'FileType',
@@ -109,7 +109,7 @@ class MP4Parser(AbstractFFmpegParser):
109 'MovieDataSize', 'VideoFrameRate', 'MediaTimeScale', 109 'MovieDataSize', 'VideoFrameRate', 'MediaTimeScale',
110 'SourceImageHeight', 'SourceImageWidth', 110 'SourceImageHeight', 'SourceImageWidth',
111 'MatrixStructure', 'MediaDuration'} 111 'MatrixStructure', 'MediaDuration'}
112 meta_key_value_whitelist = { # some metadata are mandatory :/ 112 meta_key_value_allowlist = { # some metadata are mandatory :/
113 'CreateDate': '0000:00:00 00:00:00', 113 'CreateDate': '0000:00:00 00:00:00',
114 'CurrentTime': '0 s', 114 'CurrentTime': '0 s',
115 'MediaCreateDate': '0000:00:00 00:00:00', 115 'MediaCreateDate': '0000:00:00 00:00:00',
diff --git a/libmat2/web.py b/libmat2/web.py
index 34426b8..0a61908 100644
--- a/libmat2/web.py
+++ b/libmat2/web.py
@@ -37,15 +37,15 @@ class CSSParser(abstract.AbstractParser):
37 37
38 38
39class AbstractHTMLParser(abstract.AbstractParser): 39class AbstractHTMLParser(abstract.AbstractParser):
40 tags_blacklist = set() # type: Set[str] 40 tags_blocklist = set() # type: Set[str]
41 # In some html/xml-based formats some tags are mandatory, 41 # In some html/xml-based formats some tags are mandatory,
42 # so we're keeping them, but are discarding their content 42 # so we're keeping them, but are discarding their content
43 tags_required_blacklist = set() # type: Set[str] 43 tags_required_blocklist = set() # type: Set[str]
44 44
45 def __init__(self, filename): 45 def __init__(self, filename):
46 super().__init__(filename) 46 super().__init__(filename)
47 self.__parser = _HTMLParser(self.filename, self.tags_blacklist, 47 self.__parser = _HTMLParser(self.filename, self.tags_blocklist,
48 self.tags_required_blacklist) 48 self.tags_required_blocklist)
49 with open(filename, encoding='utf-8') as f: 49 with open(filename, encoding='utf-8') as f:
50 self.__parser.feed(f.read()) 50 self.__parser.feed(f.read())
51 self.__parser.close() 51 self.__parser.close()
@@ -59,13 +59,13 @@ class AbstractHTMLParser(abstract.AbstractParser):
59 59
60class HTMLParser(AbstractHTMLParser): 60class HTMLParser(AbstractHTMLParser):
61 mimetypes = {'text/html', } 61 mimetypes = {'text/html', }
62 tags_blacklist = {'meta', } 62 tags_blocklist = {'meta', }
63 tags_required_blacklist = {'title', } 63 tags_required_blocklist = {'title', }
64 64
65 65
66class DTBNCXParser(AbstractHTMLParser): 66class DTBNCXParser(AbstractHTMLParser):
67 mimetypes = {'application/x-dtbncx+xml', } 67 mimetypes = {'application/x-dtbncx+xml', }
68 tags_required_blacklist = {'title', 'doctitle', 'meta'} 68 tags_required_blocklist = {'title', 'doctitle', 'meta'}
69 69
70 70
71class _HTMLParser(parser.HTMLParser): 71class _HTMLParser(parser.HTMLParser):
@@ -79,7 +79,7 @@ class _HTMLParser(parser.HTMLParser):
79 79
80 Also, gotcha: the `tag` parameters are always in lowercase. 80 Also, gotcha: the `tag` parameters are always in lowercase.
81 """ 81 """
82 def __init__(self, filename, blacklisted_tags, required_blacklisted_tags): 82 def __init__(self, filename, blocklisted_tags, required_blocklisted_tags):
83 super().__init__() 83 super().__init__()
84 self.filename = filename 84 self.filename = filename
85 self.__textrepr = '' 85 self.__textrepr = ''
@@ -90,24 +90,24 @@ class _HTMLParser(parser.HTMLParser):
90 self.__in_dangerous_but_required_tag = 0 90 self.__in_dangerous_but_required_tag = 0
91 self.__in_dangerous_tag = 0 91 self.__in_dangerous_tag = 0
92 92
93 if required_blacklisted_tags & blacklisted_tags: # pragma: nocover 93 if required_blocklisted_tags & blocklisted_tags: # pragma: nocover
94 raise ValueError("There is an overlap between %s and %s" % ( 94 raise ValueError("There is an overlap between %s and %s" % (
95 required_blacklisted_tags, blacklisted_tags)) 95 required_blocklisted_tags, blocklisted_tags))
96 self.tag_required_blacklist = required_blacklisted_tags 96 self.tag_required_blocklist = required_blocklisted_tags
97 self.tag_blacklist = blacklisted_tags 97 self.tag_blocklist = blocklisted_tags
98 98
99 def handle_starttag(self, tag: str, attrs: List[Tuple[str, str]]): 99 def handle_starttag(self, tag: str, attrs: List[Tuple[str, str]]):
100 original_tag = self.get_starttag_text() 100 original_tag = self.get_starttag_text()
101 self.__validation_queue.append(original_tag) 101 self.__validation_queue.append(original_tag)
102 102
103 if tag in self.tag_blacklist: 103 if tag in self.tag_blocklist:
104 self.__in_dangerous_tag += 1 104 self.__in_dangerous_tag += 1
105 105
106 if self.__in_dangerous_tag == 0: 106 if self.__in_dangerous_tag == 0:
107 if self.__in_dangerous_but_required_tag == 0: 107 if self.__in_dangerous_but_required_tag == 0:
108 self.__textrepr += original_tag 108 self.__textrepr += original_tag
109 109
110 if tag in self.tag_required_blacklist: 110 if tag in self.tag_required_blocklist:
111 self.__in_dangerous_but_required_tag += 1 111 self.__in_dangerous_but_required_tag += 1
112 112
113 def handle_endtag(self, tag: str): 113 def handle_endtag(self, tag: str):
@@ -123,7 +123,7 @@ class _HTMLParser(parser.HTMLParser):
123 "tag %s in %s" % 123 "tag %s in %s" %
124 (tag, previous_tag, self.filename)) 124 (tag, previous_tag, self.filename))
125 125
126 if tag in self.tag_required_blacklist: 126 if tag in self.tag_required_blocklist:
127 self.__in_dangerous_but_required_tag -= 1 127 self.__in_dangerous_but_required_tag -= 1
128 128
129 if self.__in_dangerous_tag == 0: 129 if self.__in_dangerous_tag == 0:
@@ -131,7 +131,7 @@ class _HTMLParser(parser.HTMLParser):
131 # There is no `get_endtag_text()` method :/ 131 # There is no `get_endtag_text()` method :/
132 self.__textrepr += '</' + previous_tag + '>' 132 self.__textrepr += '</' + previous_tag + '>'
133 133
134 if tag in self.tag_blacklist: 134 if tag in self.tag_blocklist:
135 self.__in_dangerous_tag -= 1 135 self.__in_dangerous_tag -= 1
136 136
137 def handle_data(self, data: str): 137 def handle_data(self, data: str):
@@ -141,14 +141,14 @@ class _HTMLParser(parser.HTMLParser):
141 self.__textrepr += escape(data) 141 self.__textrepr += escape(data)
142 142
143 def handle_startendtag(self, tag: str, attrs: List[Tuple[str, str]]): 143 def handle_startendtag(self, tag: str, attrs: List[Tuple[str, str]]):
144 if tag in self.tag_required_blacklist | self.tag_blacklist: 144 if tag in self.tag_required_blocklist | self.tag_blocklist:
145 meta = {k:v for k, v in attrs} 145 meta = {k:v for k, v in attrs}
146 name = meta.get('name', 'harmful metadata') 146 name = meta.get('name', 'harmful metadata')
147 content = meta.get('content', 'harmful data') 147 content = meta.get('content', 'harmful data')
148 self.__meta[name] = content 148 self.__meta[name] = content
149 149
150 if self.__in_dangerous_tag == 0: 150 if self.__in_dangerous_tag == 0:
151 if tag in self.tag_required_blacklist: 151 if tag in self.tag_required_blocklist:
152 self.__textrepr += '<' + tag + ' />' 152 self.__textrepr += '<' + tag + ' />'
153 return 153 return
154 154