summaryrefslogtreecommitdiff
path: root/libmat2/office.py
diff options
context:
space:
mode:
authorjvoisin2023-01-28 15:57:20 +0000
committerjvoisin2023-01-28 15:57:20 +0000
commit39fb254e019c920516bb317d4b48a8de7cac850e (patch)
tree5a5397fe318b8b73f6ebfdb9d1d6c0b64bbda0a3 /libmat2/office.py
parent1f73a16ef36d1a8e771a0b3695818d18e095486b (diff)
Fix the type annotations
Diffstat (limited to 'libmat2/office.py')
-rw-r--r--libmat2/office.py15
1 files changed, 6 insertions, 9 deletions
diff --git a/libmat2/office.py b/libmat2/office.py
index 8ccaa02..87a0b7e 100644
--- a/libmat2/office.py
+++ b/libmat2/office.py
@@ -4,7 +4,7 @@ import logging
4import os 4import os
5import re 5import re
6import zipfile 6import zipfile
7from typing import Pattern, Any 7from typing import Pattern, Any, Tuple, Dict
8 8
9import xml.etree.ElementTree as ET # type: ignore 9import xml.etree.ElementTree as ET # type: ignore
10 10
@@ -12,7 +12,8 @@ from .archive import ZipParser
12 12
13# pylint: disable=line-too-long 13# pylint: disable=line-too-long
14 14
15def _parse_xml(full_path: str) -> tuple[ET.ElementTree, dict[str, str]]: 15
16def _parse_xml(full_path: str) -> Tuple[ET.ElementTree, Dict[str, str]]:
16 """ This function parses XML, with namespace support. """ 17 """ This function parses XML, with namespace support. """
17 namespace_map = dict() 18 namespace_map = dict()
18 for _, (key, value) in ET.iterparse(full_path, ("start-ns", )): 19 for _, (key, value) in ET.iterparse(full_path, ("start-ns", )):
@@ -68,7 +69,6 @@ class MSOfficeParser(ZipParser):
68 'application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml', 69 'application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml',
69 } 70 }
70 71
71
72 def __init__(self, filename): 72 def __init__(self, filename):
73 super().__init__(filename) 73 super().__init__(filename)
74 74
@@ -218,7 +218,7 @@ class MSOfficeParser(ZipParser):
218 if 'w' not in namespace: 218 if 'w' not in namespace:
219 return True 219 return True
220 220
221 parent_map = {c:p for p in tree.iter() for c in p} 221 parent_map = {c: p for p in tree.iter() for c in p}
222 222
223 elements_to_remove = list() 223 elements_to_remove = list()
224 for element in tree.iterfind('.//w:nsid', namespace): 224 for element in tree.iterfind('.//w:nsid', namespace):
@@ -229,7 +229,6 @@ class MSOfficeParser(ZipParser):
229 tree.write(full_path, xml_declaration=True) 229 tree.write(full_path, xml_declaration=True)
230 return True 230 return True
231 231
232
233 @staticmethod 232 @staticmethod
234 def __remove_revisions(full_path: str) -> bool: 233 def __remove_revisions(full_path: str) -> bool:
235 try: 234 try:
@@ -319,7 +318,6 @@ class MSOfficeParser(ZipParser):
319 for i in re.findall(r'<p:cNvPr id="([0-9]+)"', content): 318 for i in re.findall(r'<p:cNvPr id="([0-9]+)"', content):
320 self.__counters['cNvPr'].add(int(i)) 319 self.__counters['cNvPr'].add(int(i))
321 320
322
323 @staticmethod 321 @staticmethod
324 def __randomize_creationId(full_path: str) -> bool: 322 def __randomize_creationId(full_path: str) -> bool:
325 try: 323 try:
@@ -441,8 +439,8 @@ class MSOfficeParser(ZipParser):
441 439
442 with open(full_path, encoding='utf-8') as f: 440 with open(full_path, encoding='utf-8') as f:
443 try: 441 try:
444 results = re.findall(r"<(.+)>(.+)</\1>", f.read(), re.I|re.M) 442 results = re.findall(r"<(.+)>(.+)</\1>", f.read(), re.I | re.M)
445 return {k:v for (k, v) in results} 443 return {k: v for (k, v) in results}
446 except (TypeError, UnicodeDecodeError): 444 except (TypeError, UnicodeDecodeError):
447 # We didn't manage to parse the xml file 445 # We didn't manage to parse the xml file
448 return {file_path: 'harmful content', } 446 return {file_path: 'harmful content', }
@@ -459,7 +457,6 @@ class LibreOfficeParser(ZipParser):
459 'application/vnd.oasis.opendocument.image', 457 'application/vnd.oasis.opendocument.image',
460 } 458 }
461 459
462
463 def __init__(self, filename): 460 def __init__(self, filename):
464 super().__init__(filename) 461 super().__init__(filename)
465 462