summaryrefslogtreecommitdiff
path: root/libmat2/web.py
diff options
context:
space:
mode:
authorjvoisin2022-08-28 22:29:06 +0200
committerjvoisin2022-08-28 22:29:06 +0200
commitcc5be8608b49d74a633b80a95a49a018d4dcd477 (patch)
tree322c21ba2543831d5a1804ebce50a3f7c2391029 /libmat2/web.py
parent292f44c0861a57b54a289641ead7e59f158e307e (diff)
Simplify the typing annotations
Diffstat (limited to 'libmat2/web.py')
-rw-r--r--libmat2/web.py19
1 files changed, 9 insertions, 10 deletions
diff --git a/libmat2/web.py b/libmat2/web.py
index 2f331d8..d324996 100644
--- a/libmat2/web.py
+++ b/libmat2/web.py
@@ -1,11 +1,10 @@
1from html import parser, escape 1from html import parser, escape
2from typing import Dict, Any, List, Tuple, Set, Optional 2from typing import Any, Optional
3import re 3import re
4import string 4import string
5 5
6from . import abstract 6from . import abstract
7 7
8assert Set
9 8
10# pylint: disable=too-many-instance-attributes 9# pylint: disable=too-many-instance-attributes
11 10
@@ -26,7 +25,7 @@ class CSSParser(abstract.AbstractParser):
26 f.write(cleaned) 25 f.write(cleaned)
27 return True 26 return True
28 27
29 def get_meta(self) -> Dict[str, Any]: 28 def get_meta(self) -> dict[str, Any]:
30 metadata = {} 29 metadata = {}
31 with open(self.filename, encoding='utf-8') as f: 30 with open(self.filename, encoding='utf-8') as f:
32 try: 31 try:
@@ -45,10 +44,10 @@ class CSSParser(abstract.AbstractParser):
45 44
46 45
47class AbstractHTMLParser(abstract.AbstractParser): 46class AbstractHTMLParser(abstract.AbstractParser):
48 tags_blocklist = set() # type: Set[str] 47 tags_blocklist = set() # type: set[str]
49 # In some html/xml-based formats some tags are mandatory, 48 # In some html/xml-based formats some tags are mandatory,
50 # so we're keeping them, but are discarding their content 49 # so we're keeping them, but are discarding their content
51 tags_required_blocklist = set() # type: Set[str] 50 tags_required_blocklist = set() # type: set[str]
52 51
53 def __init__(self, filename): 52 def __init__(self, filename):
54 super().__init__(filename) 53 super().__init__(filename)
@@ -58,7 +57,7 @@ class AbstractHTMLParser(abstract.AbstractParser):
58 self.__parser.feed(f.read()) 57 self.__parser.feed(f.read())
59 self.__parser.close() 58 self.__parser.close()
60 59
61 def get_meta(self) -> Dict[str, Any]: 60 def get_meta(self) -> dict[str, Any]:
62 return self.__parser.get_meta() 61 return self.__parser.get_meta()
63 62
64 def remove_all(self) -> bool: 63 def remove_all(self) -> bool:
@@ -92,7 +91,7 @@ class _HTMLParser(parser.HTMLParser):
92 self.filename = filename 91 self.filename = filename
93 self.__textrepr = '' 92 self.__textrepr = ''
94 self.__meta = {} 93 self.__meta = {}
95 self.__validation_queue = [] # type: List[str] 94 self.__validation_queue = [] # type: list[str]
96 95
97 # We're using counters instead of booleans, to handle nested tags 96 # We're using counters instead of booleans, to handle nested tags
98 self.__in_dangerous_but_required_tag = 0 97 self.__in_dangerous_but_required_tag = 0
@@ -114,7 +113,7 @@ class _HTMLParser(parser.HTMLParser):
114 """ 113 """
115 raise ValueError(message) 114 raise ValueError(message)
116 115
117 def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]): 116 def handle_starttag(self, tag: str, attrs: list[tuple[str, Optional[str]]]):
118 # Ignore the type, because mypy is too stupid to infer 117 # Ignore the type, because mypy is too stupid to infer
119 # that get_starttag_text() can't return None. 118 # that get_starttag_text() can't return None.
120 original_tag = self.get_starttag_text() # type: ignore 119 original_tag = self.get_starttag_text() # type: ignore
@@ -161,7 +160,7 @@ class _HTMLParser(parser.HTMLParser):
161 self.__textrepr += escape(data) 160 self.__textrepr += escape(data)
162 161
163 def handle_startendtag(self, tag: str, 162 def handle_startendtag(self, tag: str,
164 attrs: List[Tuple[str, Optional[str]]]): 163 attrs: list[tuple[str, Optional[str]]]):
165 if tag in self.tag_required_blocklist | self.tag_blocklist: 164 if tag in self.tag_required_blocklist | self.tag_blocklist:
166 meta = {k:v for k, v in attrs} 165 meta = {k:v for k, v in attrs}
167 name = meta.get('name', 'harmful metadata') 166 name = meta.get('name', 'harmful metadata')
@@ -186,7 +185,7 @@ class _HTMLParser(parser.HTMLParser):
186 f.write(self.__textrepr) 185 f.write(self.__textrepr)
187 return True 186 return True
188 187
189 def get_meta(self) -> Dict[str, Any]: 188 def get_meta(self) -> dict[str, Any]:
190 if self.__validation_queue: 189 if self.__validation_queue:
191 raise ValueError("Some tags (%s) were left unclosed in %s" % ( 190 raise ValueError("Some tags (%s) were left unclosed in %s" % (
192 ', '.join(self.__validation_queue), 191 ', '.join(self.__validation_queue),