summaryrefslogtreecommitdiff
path: root/libmat2/web.py
diff options
context:
space:
mode:
Diffstat (limited to 'libmat2/web.py')
-rw-r--r--libmat2/web.py16
1 files changed, 8 insertions, 8 deletions
diff --git a/libmat2/web.py b/libmat2/web.py
index 574bdd7..f2938e2 100644
--- a/libmat2/web.py
+++ b/libmat2/web.py
@@ -1,5 +1,5 @@
1from html import parser, escape 1from html import parser, escape
2from typing import Any, Optional 2from typing import Any, Optional, Dict, List, Tuple, Set
3import re 3import re
4import string 4import string
5 5
@@ -25,7 +25,7 @@ class CSSParser(abstract.AbstractParser):
25 f.write(cleaned) 25 f.write(cleaned)
26 return True 26 return True
27 27
28 def get_meta(self) -> dict[str, Any]: 28 def get_meta(self) -> Dict[str, Any]:
29 metadata = {} 29 metadata = {}
30 with open(self.filename, encoding='utf-8') as f: 30 with open(self.filename, encoding='utf-8') as f:
31 try: 31 try:
@@ -44,10 +44,10 @@ class CSSParser(abstract.AbstractParser):
44 44
45 45
46class AbstractHTMLParser(abstract.AbstractParser): 46class AbstractHTMLParser(abstract.AbstractParser):
47 tags_blocklist = set() # type: set[str] 47 tags_blocklist = set() # type: Set[str]
48 # In some html/xml-based formats some tags are mandatory, 48 # In some html/xml-based formats some tags are mandatory,
49 # so we're keeping them, but are discarding their content 49 # so we're keeping them, but are discarding their content
50 tags_required_blocklist = set() # type: set[str] 50 tags_required_blocklist = set() # type: Set[str]
51 51
52 def __init__(self, filename): 52 def __init__(self, filename):
53 super().__init__(filename) 53 super().__init__(filename)
@@ -57,7 +57,7 @@ class AbstractHTMLParser(abstract.AbstractParser):
57 self.__parser.feed(f.read()) 57 self.__parser.feed(f.read())
58 self.__parser.close() 58 self.__parser.close()
59 59
60 def get_meta(self) -> dict[str, Any]: 60 def get_meta(self) -> Dict[str, Any]:
61 return self.__parser.get_meta() 61 return self.__parser.get_meta()
62 62
63 def remove_all(self) -> bool: 63 def remove_all(self) -> bool:
@@ -112,7 +112,7 @@ class _HTMLParser(parser.HTMLParser):
112 """ 112 """
113 raise ValueError(message) 113 raise ValueError(message)
114 114
115 def handle_starttag(self, tag: str, attrs: list[tuple[str, Optional[str]]]): 115 def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]):
116 # Ignore the type, because mypy is too stupid to infer 116 # Ignore the type, because mypy is too stupid to infer
117 # that get_starttag_text() can't return None. 117 # that get_starttag_text() can't return None.
118 original_tag = self.get_starttag_text() # type: ignore 118 original_tag = self.get_starttag_text() # type: ignore
@@ -159,7 +159,7 @@ class _HTMLParser(parser.HTMLParser):
159 self.__textrepr += escape(data) 159 self.__textrepr += escape(data)
160 160
161 def handle_startendtag(self, tag: str, 161 def handle_startendtag(self, tag: str,
162 attrs: list[tuple[str, Optional[str]]]): 162 attrs: List[Tuple[str, Optional[str]]]):
163 if tag in self.tag_required_blocklist | self.tag_blocklist: 163 if tag in self.tag_required_blocklist | self.tag_blocklist:
164 meta = {k:v for k, v in attrs} 164 meta = {k:v for k, v in attrs}
165 name = meta.get('name', 'harmful metadata') 165 name = meta.get('name', 'harmful metadata')
@@ -184,7 +184,7 @@ class _HTMLParser(parser.HTMLParser):
184 f.write(self.__textrepr) 184 f.write(self.__textrepr)
185 return True 185 return True
186 186
187 def get_meta(self) -> dict[str, Any]: 187 def get_meta(self) -> Dict[str, Any]:
188 if self.__validation_queue: 188 if self.__validation_queue:
189 raise ValueError("Some tags (%s) were left unclosed in %s" % ( 189 raise ValueError("Some tags (%s) were left unclosed in %s" % (
190 ', '.join(self.__validation_queue), 190 ', '.join(self.__validation_queue),