diff options
Diffstat (limited to 'libmat2/web.py')
| -rw-r--r-- | libmat2/web.py | 16 |
1 files changed, 8 insertions, 8 deletions
diff --git a/libmat2/web.py b/libmat2/web.py index 574bdd7..f2938e2 100644 --- a/libmat2/web.py +++ b/libmat2/web.py | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | from html import parser, escape | 1 | from html import parser, escape |
| 2 | from typing import Any, Optional | 2 | from typing import Any, Optional, Dict, List, Tuple, Set |
| 3 | import re | 3 | import re |
| 4 | import string | 4 | import string |
| 5 | 5 | ||
| @@ -25,7 +25,7 @@ class CSSParser(abstract.AbstractParser): | |||
| 25 | f.write(cleaned) | 25 | f.write(cleaned) |
| 26 | return True | 26 | return True |
| 27 | 27 | ||
| 28 | def get_meta(self) -> dict[str, Any]: | 28 | def get_meta(self) -> Dict[str, Any]: |
| 29 | metadata = {} | 29 | metadata = {} |
| 30 | with open(self.filename, encoding='utf-8') as f: | 30 | with open(self.filename, encoding='utf-8') as f: |
| 31 | try: | 31 | try: |
| @@ -44,10 +44,10 @@ class CSSParser(abstract.AbstractParser): | |||
| 44 | 44 | ||
| 45 | 45 | ||
| 46 | class AbstractHTMLParser(abstract.AbstractParser): | 46 | class AbstractHTMLParser(abstract.AbstractParser): |
| 47 | tags_blocklist = set() # type: set[str] | 47 | tags_blocklist = set() # type: Set[str] |
| 48 | # In some html/xml-based formats some tags are mandatory, | 48 | # In some html/xml-based formats some tags are mandatory, |
| 49 | # so we're keeping them, but are discarding their content | 49 | # so we're keeping them, but are discarding their content |
| 50 | tags_required_blocklist = set() # type: set[str] | 50 | tags_required_blocklist = set() # type: Set[str] |
| 51 | 51 | ||
| 52 | def __init__(self, filename): | 52 | def __init__(self, filename): |
| 53 | super().__init__(filename) | 53 | super().__init__(filename) |
| @@ -57,7 +57,7 @@ class AbstractHTMLParser(abstract.AbstractParser): | |||
| 57 | self.__parser.feed(f.read()) | 57 | self.__parser.feed(f.read()) |
| 58 | self.__parser.close() | 58 | self.__parser.close() |
| 59 | 59 | ||
| 60 | def get_meta(self) -> dict[str, Any]: | 60 | def get_meta(self) -> Dict[str, Any]: |
| 61 | return self.__parser.get_meta() | 61 | return self.__parser.get_meta() |
| 62 | 62 | ||
| 63 | def remove_all(self) -> bool: | 63 | def remove_all(self) -> bool: |
| @@ -112,7 +112,7 @@ class _HTMLParser(parser.HTMLParser): | |||
| 112 | """ | 112 | """ |
| 113 | raise ValueError(message) | 113 | raise ValueError(message) |
| 114 | 114 | ||
| 115 | def handle_starttag(self, tag: str, attrs: list[tuple[str, Optional[str]]]): | 115 | def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]): |
| 116 | # Ignore the type, because mypy is too stupid to infer | 116 | # Ignore the type, because mypy is too stupid to infer |
| 117 | # that get_starttag_text() can't return None. | 117 | # that get_starttag_text() can't return None. |
| 118 | original_tag = self.get_starttag_text() # type: ignore | 118 | original_tag = self.get_starttag_text() # type: ignore |
| @@ -159,7 +159,7 @@ class _HTMLParser(parser.HTMLParser): | |||
| 159 | self.__textrepr += escape(data) | 159 | self.__textrepr += escape(data) |
| 160 | 160 | ||
| 161 | def handle_startendtag(self, tag: str, | 161 | def handle_startendtag(self, tag: str, |
| 162 | attrs: list[tuple[str, Optional[str]]]): | 162 | attrs: List[Tuple[str, Optional[str]]]): |
| 163 | if tag in self.tag_required_blocklist | self.tag_blocklist: | 163 | if tag in self.tag_required_blocklist | self.tag_blocklist: |
| 164 | meta = {k:v for k, v in attrs} | 164 | meta = {k:v for k, v in attrs} |
| 165 | name = meta.get('name', 'harmful metadata') | 165 | name = meta.get('name', 'harmful metadata') |
| @@ -184,7 +184,7 @@ class _HTMLParser(parser.HTMLParser): | |||
| 184 | f.write(self.__textrepr) | 184 | f.write(self.__textrepr) |
| 185 | return True | 185 | return True |
| 186 | 186 | ||
| 187 | def get_meta(self) -> dict[str, Any]: | 187 | def get_meta(self) -> Dict[str, Any]: |
| 188 | if self.__validation_queue: | 188 | if self.__validation_queue: |
| 189 | raise ValueError("Some tags (%s) were left unclosed in %s" % ( | 189 | raise ValueError("Some tags (%s) were left unclosed in %s" % ( |
| 190 | ', '.join(self.__validation_queue), | 190 | ', '.join(self.__validation_queue), |
