diff options
Diffstat (limited to 'libmat2')
| -rw-r--r-- | libmat2/exiftool.py | 4 | ||||
| -rw-r--r-- | libmat2/images.py | 8 | ||||
| -rw-r--r-- | libmat2/office.py | 6 | ||||
| -rw-r--r-- | libmat2/torrent.py | 6 | ||||
| -rw-r--r-- | libmat2/video.py | 18 | ||||
| -rw-r--r-- | libmat2/web.py | 36 |
6 files changed, 39 insertions, 39 deletions
diff --git a/libmat2/exiftool.py b/libmat2/exiftool.py index db92f60..d18b1fb 100644 --- a/libmat2/exiftool.py +++ b/libmat2/exiftool.py | |||
| @@ -15,14 +15,14 @@ class ExiftoolParser(abstract.AbstractParser): | |||
| 15 | from a import file, hence why several parsers are re-using its `get_meta` | 15 | from a import file, hence why several parsers are re-using its `get_meta` |
| 16 | method. | 16 | method. |
| 17 | """ | 17 | """ |
| 18 | meta_whitelist = set() # type: Set[str] | 18 | meta_allowlist = set() # type: Set[str] |
| 19 | 19 | ||
| 20 | def get_meta(self) -> Dict[str, Union[str, dict]]: | 20 | def get_meta(self) -> Dict[str, Union[str, dict]]: |
| 21 | out = subprocess.run([_get_exiftool_path(), '-json', self.filename], | 21 | out = subprocess.run([_get_exiftool_path(), '-json', self.filename], |
| 22 | input_filename=self.filename, | 22 | input_filename=self.filename, |
| 23 | check=True, stdout=subprocess.PIPE).stdout | 23 | check=True, stdout=subprocess.PIPE).stdout |
| 24 | meta = json.loads(out.decode('utf-8'))[0] | 24 | meta = json.loads(out.decode('utf-8'))[0] |
| 25 | for key in self.meta_whitelist: | 25 | for key in self.meta_allowlist: |
| 26 | meta.pop(key, None) | 26 | meta.pop(key, None) |
| 27 | return meta | 27 | return meta |
| 28 | 28 | ||
diff --git a/libmat2/images.py b/libmat2/images.py index dd3be53..32a329f 100644 --- a/libmat2/images.py +++ b/libmat2/images.py | |||
| @@ -15,7 +15,7 @@ assert Set | |||
| 15 | 15 | ||
| 16 | class PNGParser(exiftool.ExiftoolParser): | 16 | class PNGParser(exiftool.ExiftoolParser): |
| 17 | mimetypes = {'image/png', } | 17 | mimetypes = {'image/png', } |
| 18 | meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', | 18 | meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName', |
| 19 | 'Directory', 'FileSize', 'FileModifyDate', | 19 | 'Directory', 'FileSize', 'FileModifyDate', |
| 20 | 'FileAccessDate', 'FileInodeChangeDate', | 20 | 'FileAccessDate', 'FileInodeChangeDate', |
| 21 | 'FilePermissions', 'FileType', 'FileTypeExtension', | 21 | 'FilePermissions', 'FileType', 'FileTypeExtension', |
| @@ -44,7 +44,7 @@ class PNGParser(exiftool.ExiftoolParser): | |||
| 44 | 44 | ||
| 45 | class GIFParser(exiftool.ExiftoolParser): | 45 | class GIFParser(exiftool.ExiftoolParser): |
| 46 | mimetypes = {'image/gif'} | 46 | mimetypes = {'image/gif'} |
| 47 | meta_whitelist = {'AnimationIterations', 'BackgroundColor', 'BitsPerPixel', | 47 | meta_allowlist = {'AnimationIterations', 'BackgroundColor', 'BitsPerPixel', |
| 48 | 'ColorResolutionDepth', 'Directory', 'Duration', | 48 | 'ColorResolutionDepth', 'Directory', 'Duration', |
| 49 | 'ExifToolVersion', 'FileAccessDate', | 49 | 'ExifToolVersion', 'FileAccessDate', |
| 50 | 'FileInodeChangeDate', 'FileModifyDate', 'FileName', | 50 | 'FileInodeChangeDate', 'FileModifyDate', 'FileName', |
| @@ -86,7 +86,7 @@ class GdkPixbufAbstractParser(exiftool.ExiftoolParser): | |||
| 86 | class JPGParser(GdkPixbufAbstractParser): | 86 | class JPGParser(GdkPixbufAbstractParser): |
| 87 | _type = 'jpeg' | 87 | _type = 'jpeg' |
| 88 | mimetypes = {'image/jpeg'} | 88 | mimetypes = {'image/jpeg'} |
| 89 | meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', | 89 | meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName', |
| 90 | 'Directory', 'FileSize', 'FileModifyDate', | 90 | 'Directory', 'FileSize', 'FileModifyDate', |
| 91 | 'FileAccessDate', "FileInodeChangeDate", | 91 | 'FileAccessDate', "FileInodeChangeDate", |
| 92 | 'FilePermissions', 'FileType', 'FileTypeExtension', | 92 | 'FilePermissions', 'FileType', 'FileTypeExtension', |
| @@ -99,7 +99,7 @@ class JPGParser(GdkPixbufAbstractParser): | |||
| 99 | class TiffParser(GdkPixbufAbstractParser): | 99 | class TiffParser(GdkPixbufAbstractParser): |
| 100 | _type = 'tiff' | 100 | _type = 'tiff' |
| 101 | mimetypes = {'image/tiff'} | 101 | mimetypes = {'image/tiff'} |
| 102 | meta_whitelist = {'Compression', 'ExifByteOrder', 'ExtraSamples', | 102 | meta_allowlist = {'Compression', 'ExifByteOrder', 'ExtraSamples', |
| 103 | 'FillOrder', 'PhotometricInterpretation', | 103 | 'FillOrder', 'PhotometricInterpretation', |
| 104 | 'PlanarConfiguration', 'RowsPerStrip', 'SamplesPerPixel', | 104 | 'PlanarConfiguration', 'RowsPerStrip', 'SamplesPerPixel', |
| 105 | 'StripByteCounts', 'StripOffsets', 'BitsPerSample', | 105 | 'StripByteCounts', 'StripOffsets', 'BitsPerSample', |
diff --git a/libmat2/office.py b/libmat2/office.py index f3a5b22..2c9cbff 100644 --- a/libmat2/office.py +++ b/libmat2/office.py | |||
| @@ -89,7 +89,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser): | |||
| 89 | r'^word/theme', | 89 | r'^word/theme', |
| 90 | r'^word/people\.xml$', | 90 | r'^word/people\.xml$', |
| 91 | 91 | ||
| 92 | # we have a whitelist in self.files_to_keep, | 92 | # we have an allowlist in self.files_to_keep, |
| 93 | # so we can trash everything else | 93 | # so we can trash everything else |
| 94 | r'^word/_rels/', | 94 | r'^word/_rels/', |
| 95 | })) | 95 | })) |
| @@ -100,7 +100,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser): | |||
| 100 | def __fill_files_to_keep_via_content_types(self) -> bool: | 100 | def __fill_files_to_keep_via_content_types(self) -> bool: |
| 101 | """ There is a suer-handy `[Content_Types].xml` file | 101 | """ There is a suer-handy `[Content_Types].xml` file |
| 102 | in MS Office archives, describing what each other file contains. | 102 | in MS Office archives, describing what each other file contains. |
| 103 | The self.content_types_to_keep member contains a type whitelist, | 103 | The self.content_types_to_keep member contains a type allowlist, |
| 104 | so we're using it to fill the self.files_to_keep one. | 104 | so we're using it to fill the self.files_to_keep one. |
| 105 | """ | 105 | """ |
| 106 | with zipfile.ZipFile(self.filename) as zin: | 106 | with zipfile.ZipFile(self.filename) as zin: |
| @@ -220,7 +220,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser): | |||
| 220 | for file_to_omit in self.files_to_omit: | 220 | for file_to_omit in self.files_to_omit: |
| 221 | if file_to_omit.search(fname): | 221 | if file_to_omit.search(fname): |
| 222 | matches = map(lambda r: r.search(fname), self.files_to_keep) | 222 | matches = map(lambda r: r.search(fname), self.files_to_keep) |
| 223 | if any(matches): # the file is whitelisted | 223 | if any(matches): # the file is in the allowlist |
| 224 | continue | 224 | continue |
| 225 | removed_fnames.add(fname) | 225 | removed_fnames.add(fname) |
| 226 | break | 226 | break |
diff --git a/libmat2/torrent.py b/libmat2/torrent.py index c006f9c..6021d75 100644 --- a/libmat2/torrent.py +++ b/libmat2/torrent.py | |||
| @@ -6,7 +6,7 @@ from . import abstract | |||
| 6 | 6 | ||
| 7 | class TorrentParser(abstract.AbstractParser): | 7 | class TorrentParser(abstract.AbstractParser): |
| 8 | mimetypes = {'application/x-bittorrent', } | 8 | mimetypes = {'application/x-bittorrent', } |
| 9 | whitelist = {b'announce', b'announce-list', b'info'} | 9 | allowlist = {b'announce', b'announce-list', b'info'} |
| 10 | 10 | ||
| 11 | def __init__(self, filename): | 11 | def __init__(self, filename): |
| 12 | super().__init__(filename) | 12 | super().__init__(filename) |
| @@ -18,14 +18,14 @@ class TorrentParser(abstract.AbstractParser): | |||
| 18 | def get_meta(self) -> Dict[str, Union[str, dict]]: | 18 | def get_meta(self) -> Dict[str, Union[str, dict]]: |
| 19 | metadata = {} | 19 | metadata = {} |
| 20 | for key, value in self.dict_repr.items(): | 20 | for key, value in self.dict_repr.items(): |
| 21 | if key not in self.whitelist: | 21 | if key not in self.allowlist: |
| 22 | metadata[key.decode('utf-8')] = value | 22 | metadata[key.decode('utf-8')] = value |
| 23 | return metadata | 23 | return metadata |
| 24 | 24 | ||
| 25 | def remove_all(self) -> bool: | 25 | def remove_all(self) -> bool: |
| 26 | cleaned = dict() | 26 | cleaned = dict() |
| 27 | for key, value in self.dict_repr.items(): | 27 | for key, value in self.dict_repr.items(): |
| 28 | if key in self.whitelist: | 28 | if key in self.allowlist: |
| 29 | cleaned[key] = value | 29 | cleaned[key] = value |
| 30 | with open(self.output_filename, 'wb') as f: | 30 | with open(self.output_filename, 'wb') as f: |
| 31 | f.write(_BencodeHandler().bencode(cleaned)) | 31 | f.write(_BencodeHandler().bencode(cleaned)) |
diff --git a/libmat2/video.py b/libmat2/video.py index 4f15b19..0060f78 100644 --- a/libmat2/video.py +++ b/libmat2/video.py | |||
| @@ -10,10 +10,10 @@ from . import subprocess | |||
| 10 | class AbstractFFmpegParser(exiftool.ExiftoolParser): | 10 | class AbstractFFmpegParser(exiftool.ExiftoolParser): |
| 11 | """ Abstract parser for all FFmpeg-based ones, mainly for video. """ | 11 | """ Abstract parser for all FFmpeg-based ones, mainly for video. """ |
| 12 | # Some fileformats have mandatory metadata fields | 12 | # Some fileformats have mandatory metadata fields |
| 13 | meta_key_value_whitelist = {} # type: Dict[str, Union[str, int]] | 13 | meta_key_value_allowlist = {} # type: Dict[str, Union[str, int]] |
| 14 | 14 | ||
| 15 | def remove_all(self) -> bool: | 15 | def remove_all(self) -> bool: |
| 16 | if self.meta_key_value_whitelist: | 16 | if self.meta_key_value_allowlist: |
| 17 | logging.warning('The format of "%s" (%s) has some mandatory ' | 17 | logging.warning('The format of "%s" (%s) has some mandatory ' |
| 18 | 'metadata fields; mat2 filled them with standard ' | 18 | 'metadata fields; mat2 filled them with standard ' |
| 19 | 'data.', self.filename, ', '.join(self.mimetypes)) | 19 | 'data.', self.filename, ', '.join(self.mimetypes)) |
| @@ -45,8 +45,8 @@ class AbstractFFmpegParser(exiftool.ExiftoolParser): | |||
| 45 | 45 | ||
| 46 | ret = dict() # type: Dict[str, Union[str, dict]] | 46 | ret = dict() # type: Dict[str, Union[str, dict]] |
| 47 | for key, value in meta.items(): | 47 | for key, value in meta.items(): |
| 48 | if key in self.meta_key_value_whitelist.keys(): | 48 | if key in self.meta_key_value_allowlist.keys(): |
| 49 | if value == self.meta_key_value_whitelist[key]: | 49 | if value == self.meta_key_value_allowlist[key]: |
| 50 | continue | 50 | continue |
| 51 | ret[key] = value | 51 | ret[key] = value |
| 52 | return ret | 52 | return ret |
| @@ -54,7 +54,7 @@ class AbstractFFmpegParser(exiftool.ExiftoolParser): | |||
| 54 | 54 | ||
| 55 | class WMVParser(AbstractFFmpegParser): | 55 | class WMVParser(AbstractFFmpegParser): |
| 56 | mimetypes = {'video/x-ms-wmv', } | 56 | mimetypes = {'video/x-ms-wmv', } |
| 57 | meta_whitelist = {'AudioChannels', 'AudioCodecID', 'AudioCodecName', | 57 | meta_allowlist = {'AudioChannels', 'AudioCodecID', 'AudioCodecName', |
| 58 | 'ErrorCorrectionType', 'AudioSampleRate', 'DataPackets', | 58 | 'ErrorCorrectionType', 'AudioSampleRate', 'DataPackets', |
| 59 | 'Directory', 'Duration', 'ExifToolVersion', | 59 | 'Directory', 'Duration', 'ExifToolVersion', |
| 60 | 'FileAccessDate', 'FileInodeChangeDate', 'FileLength', | 60 | 'FileAccessDate', 'FileInodeChangeDate', 'FileLength', |
| @@ -64,7 +64,7 @@ class WMVParser(AbstractFFmpegParser): | |||
| 64 | 'ImageWidth', 'MIMEType', 'MaxBitrate', 'MaxPacketSize', | 64 | 'ImageWidth', 'MIMEType', 'MaxBitrate', 'MaxPacketSize', |
| 65 | 'Megapixels', 'MinPacketSize', 'Preroll', 'SendDuration', | 65 | 'Megapixels', 'MinPacketSize', 'Preroll', 'SendDuration', |
| 66 | 'SourceFile', 'StreamNumber', 'VideoCodecName', } | 66 | 'SourceFile', 'StreamNumber', 'VideoCodecName', } |
| 67 | meta_key_value_whitelist = { # some metadata are mandatory :/ | 67 | meta_key_value_allowlist = { # some metadata are mandatory :/ |
| 68 | 'AudioCodecDescription': '', | 68 | 'AudioCodecDescription': '', |
| 69 | 'CreationDate': '0000:00:00 00:00:00Z', | 69 | 'CreationDate': '0000:00:00 00:00:00Z', |
| 70 | 'FileID': '00000000-0000-0000-0000-000000000000', | 70 | 'FileID': '00000000-0000-0000-0000-000000000000', |
| @@ -78,7 +78,7 @@ class WMVParser(AbstractFFmpegParser): | |||
| 78 | 78 | ||
| 79 | class AVIParser(AbstractFFmpegParser): | 79 | class AVIParser(AbstractFFmpegParser): |
| 80 | mimetypes = {'video/x-msvideo', } | 80 | mimetypes = {'video/x-msvideo', } |
| 81 | meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory', | 81 | meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory', |
| 82 | 'FileSize', 'FileModifyDate', 'FileAccessDate', | 82 | 'FileSize', 'FileModifyDate', 'FileAccessDate', |
| 83 | 'FileInodeChangeDate', 'FilePermissions', 'FileType', | 83 | 'FileInodeChangeDate', 'FilePermissions', 'FileType', |
| 84 | 'FileTypeExtension', 'MIMEType', 'FrameRate', 'MaxDataRate', | 84 | 'FileTypeExtension', 'MIMEType', 'FrameRate', 'MaxDataRate', |
| @@ -98,7 +98,7 @@ class AVIParser(AbstractFFmpegParser): | |||
| 98 | 98 | ||
| 99 | class MP4Parser(AbstractFFmpegParser): | 99 | class MP4Parser(AbstractFFmpegParser): |
| 100 | mimetypes = {'video/mp4', } | 100 | mimetypes = {'video/mp4', } |
| 101 | meta_whitelist = {'AudioFormat', 'AvgBitrate', 'Balance', 'TrackDuration', | 101 | meta_allowlist = {'AudioFormat', 'AvgBitrate', 'Balance', 'TrackDuration', |
| 102 | 'XResolution', 'YResolution', 'ExifToolVersion', | 102 | 'XResolution', 'YResolution', 'ExifToolVersion', |
| 103 | 'FileAccessDate', 'FileInodeChangeDate', 'FileModifyDate', | 103 | 'FileAccessDate', 'FileInodeChangeDate', 'FileModifyDate', |
| 104 | 'FileName', 'FilePermissions', 'MIMEType', 'FileType', | 104 | 'FileName', 'FilePermissions', 'MIMEType', 'FileType', |
| @@ -109,7 +109,7 @@ class MP4Parser(AbstractFFmpegParser): | |||
| 109 | 'MovieDataSize', 'VideoFrameRate', 'MediaTimeScale', | 109 | 'MovieDataSize', 'VideoFrameRate', 'MediaTimeScale', |
| 110 | 'SourceImageHeight', 'SourceImageWidth', | 110 | 'SourceImageHeight', 'SourceImageWidth', |
| 111 | 'MatrixStructure', 'MediaDuration'} | 111 | 'MatrixStructure', 'MediaDuration'} |
| 112 | meta_key_value_whitelist = { # some metadata are mandatory :/ | 112 | meta_key_value_allowlist = { # some metadata are mandatory :/ |
| 113 | 'CreateDate': '0000:00:00 00:00:00', | 113 | 'CreateDate': '0000:00:00 00:00:00', |
| 114 | 'CurrentTime': '0 s', | 114 | 'CurrentTime': '0 s', |
| 115 | 'MediaCreateDate': '0000:00:00 00:00:00', | 115 | 'MediaCreateDate': '0000:00:00 00:00:00', |
diff --git a/libmat2/web.py b/libmat2/web.py index 34426b8..0a61908 100644 --- a/libmat2/web.py +++ b/libmat2/web.py | |||
| @@ -37,15 +37,15 @@ class CSSParser(abstract.AbstractParser): | |||
| 37 | 37 | ||
| 38 | 38 | ||
| 39 | class AbstractHTMLParser(abstract.AbstractParser): | 39 | class AbstractHTMLParser(abstract.AbstractParser): |
| 40 | tags_blacklist = set() # type: Set[str] | 40 | tags_blocklist = set() # type: Set[str] |
| 41 | # In some html/xml-based formats some tags are mandatory, | 41 | # In some html/xml-based formats some tags are mandatory, |
| 42 | # so we're keeping them, but are discarding their content | 42 | # so we're keeping them, but are discarding their content |
| 43 | tags_required_blacklist = set() # type: Set[str] | 43 | tags_required_blocklist = set() # type: Set[str] |
| 44 | 44 | ||
| 45 | def __init__(self, filename): | 45 | def __init__(self, filename): |
| 46 | super().__init__(filename) | 46 | super().__init__(filename) |
| 47 | self.__parser = _HTMLParser(self.filename, self.tags_blacklist, | 47 | self.__parser = _HTMLParser(self.filename, self.tags_blocklist, |
| 48 | self.tags_required_blacklist) | 48 | self.tags_required_blocklist) |
| 49 | with open(filename, encoding='utf-8') as f: | 49 | with open(filename, encoding='utf-8') as f: |
| 50 | self.__parser.feed(f.read()) | 50 | self.__parser.feed(f.read()) |
| 51 | self.__parser.close() | 51 | self.__parser.close() |
| @@ -59,13 +59,13 @@ class AbstractHTMLParser(abstract.AbstractParser): | |||
| 59 | 59 | ||
| 60 | class HTMLParser(AbstractHTMLParser): | 60 | class HTMLParser(AbstractHTMLParser): |
| 61 | mimetypes = {'text/html', } | 61 | mimetypes = {'text/html', } |
| 62 | tags_blacklist = {'meta', } | 62 | tags_blocklist = {'meta', } |
| 63 | tags_required_blacklist = {'title', } | 63 | tags_required_blocklist = {'title', } |
| 64 | 64 | ||
| 65 | 65 | ||
| 66 | class DTBNCXParser(AbstractHTMLParser): | 66 | class DTBNCXParser(AbstractHTMLParser): |
| 67 | mimetypes = {'application/x-dtbncx+xml', } | 67 | mimetypes = {'application/x-dtbncx+xml', } |
| 68 | tags_required_blacklist = {'title', 'doctitle', 'meta'} | 68 | tags_required_blocklist = {'title', 'doctitle', 'meta'} |
| 69 | 69 | ||
| 70 | 70 | ||
| 71 | class _HTMLParser(parser.HTMLParser): | 71 | class _HTMLParser(parser.HTMLParser): |
| @@ -79,7 +79,7 @@ class _HTMLParser(parser.HTMLParser): | |||
| 79 | 79 | ||
| 80 | Also, gotcha: the `tag` parameters are always in lowercase. | 80 | Also, gotcha: the `tag` parameters are always in lowercase. |
| 81 | """ | 81 | """ |
| 82 | def __init__(self, filename, blacklisted_tags, required_blacklisted_tags): | 82 | def __init__(self, filename, blocklisted_tags, required_blocklisted_tags): |
| 83 | super().__init__() | 83 | super().__init__() |
| 84 | self.filename = filename | 84 | self.filename = filename |
| 85 | self.__textrepr = '' | 85 | self.__textrepr = '' |
| @@ -90,24 +90,24 @@ class _HTMLParser(parser.HTMLParser): | |||
| 90 | self.__in_dangerous_but_required_tag = 0 | 90 | self.__in_dangerous_but_required_tag = 0 |
| 91 | self.__in_dangerous_tag = 0 | 91 | self.__in_dangerous_tag = 0 |
| 92 | 92 | ||
| 93 | if required_blacklisted_tags & blacklisted_tags: # pragma: nocover | 93 | if required_blocklisted_tags & blocklisted_tags: # pragma: nocover |
| 94 | raise ValueError("There is an overlap between %s and %s" % ( | 94 | raise ValueError("There is an overlap between %s and %s" % ( |
| 95 | required_blacklisted_tags, blacklisted_tags)) | 95 | required_blocklisted_tags, blocklisted_tags)) |
| 96 | self.tag_required_blacklist = required_blacklisted_tags | 96 | self.tag_required_blocklist = required_blocklisted_tags |
| 97 | self.tag_blacklist = blacklisted_tags | 97 | self.tag_blocklist = blocklisted_tags |
| 98 | 98 | ||
| 99 | def handle_starttag(self, tag: str, attrs: List[Tuple[str, str]]): | 99 | def handle_starttag(self, tag: str, attrs: List[Tuple[str, str]]): |
| 100 | original_tag = self.get_starttag_text() | 100 | original_tag = self.get_starttag_text() |
| 101 | self.__validation_queue.append(original_tag) | 101 | self.__validation_queue.append(original_tag) |
| 102 | 102 | ||
| 103 | if tag in self.tag_blacklist: | 103 | if tag in self.tag_blocklist: |
| 104 | self.__in_dangerous_tag += 1 | 104 | self.__in_dangerous_tag += 1 |
| 105 | 105 | ||
| 106 | if self.__in_dangerous_tag == 0: | 106 | if self.__in_dangerous_tag == 0: |
| 107 | if self.__in_dangerous_but_required_tag == 0: | 107 | if self.__in_dangerous_but_required_tag == 0: |
| 108 | self.__textrepr += original_tag | 108 | self.__textrepr += original_tag |
| 109 | 109 | ||
| 110 | if tag in self.tag_required_blacklist: | 110 | if tag in self.tag_required_blocklist: |
| 111 | self.__in_dangerous_but_required_tag += 1 | 111 | self.__in_dangerous_but_required_tag += 1 |
| 112 | 112 | ||
| 113 | def handle_endtag(self, tag: str): | 113 | def handle_endtag(self, tag: str): |
| @@ -123,7 +123,7 @@ class _HTMLParser(parser.HTMLParser): | |||
| 123 | "tag %s in %s" % | 123 | "tag %s in %s" % |
| 124 | (tag, previous_tag, self.filename)) | 124 | (tag, previous_tag, self.filename)) |
| 125 | 125 | ||
| 126 | if tag in self.tag_required_blacklist: | 126 | if tag in self.tag_required_blocklist: |
| 127 | self.__in_dangerous_but_required_tag -= 1 | 127 | self.__in_dangerous_but_required_tag -= 1 |
| 128 | 128 | ||
| 129 | if self.__in_dangerous_tag == 0: | 129 | if self.__in_dangerous_tag == 0: |
| @@ -131,7 +131,7 @@ class _HTMLParser(parser.HTMLParser): | |||
| 131 | # There is no `get_endtag_text()` method :/ | 131 | # There is no `get_endtag_text()` method :/ |
| 132 | self.__textrepr += '</' + previous_tag + '>' | 132 | self.__textrepr += '</' + previous_tag + '>' |
| 133 | 133 | ||
| 134 | if tag in self.tag_blacklist: | 134 | if tag in self.tag_blocklist: |
| 135 | self.__in_dangerous_tag -= 1 | 135 | self.__in_dangerous_tag -= 1 |
| 136 | 136 | ||
| 137 | def handle_data(self, data: str): | 137 | def handle_data(self, data: str): |
| @@ -141,14 +141,14 @@ class _HTMLParser(parser.HTMLParser): | |||
| 141 | self.__textrepr += escape(data) | 141 | self.__textrepr += escape(data) |
| 142 | 142 | ||
| 143 | def handle_startendtag(self, tag: str, attrs: List[Tuple[str, str]]): | 143 | def handle_startendtag(self, tag: str, attrs: List[Tuple[str, str]]): |
| 144 | if tag in self.tag_required_blacklist | self.tag_blacklist: | 144 | if tag in self.tag_required_blocklist | self.tag_blocklist: |
| 145 | meta = {k:v for k, v in attrs} | 145 | meta = {k:v for k, v in attrs} |
| 146 | name = meta.get('name', 'harmful metadata') | 146 | name = meta.get('name', 'harmful metadata') |
| 147 | content = meta.get('content', 'harmful data') | 147 | content = meta.get('content', 'harmful data') |
| 148 | self.__meta[name] = content | 148 | self.__meta[name] = content |
| 149 | 149 | ||
| 150 | if self.__in_dangerous_tag == 0: | 150 | if self.__in_dangerous_tag == 0: |
| 151 | if tag in self.tag_required_blacklist: | 151 | if tag in self.tag_required_blocklist: |
| 152 | self.__textrepr += '<' + tag + ' />' | 152 | self.__textrepr += '<' + tag + ' />' |
| 153 | return | 153 | return |
| 154 | 154 | ||
