summaryrefslogtreecommitdiff
path: root/libmat2/office.py
diff options
context:
space:
mode:
authorjvoisin2018-10-05 17:00:59 +0200
committerjvoisin2018-10-05 17:07:58 +0200
commit0d25b18d266b1cd546294fee8ba735831f9a7fef (patch)
tree9cac68778ece89d246232ad1c4a1db3abcd58999 /libmat2/office.py
parentd0f3534efffb057e0f6a5decde70d96b6c98cab8 (diff)
Improve both the typing and the comments
Diffstat (limited to 'libmat2/office.py')
-rw-r--r--libmat2/office.py17
1 files changed, 8 insertions, 9 deletions
diff --git a/libmat2/office.py b/libmat2/office.py
index 54347ea..32e7b75 100644
--- a/libmat2/office.py
+++ b/libmat2/office.py
@@ -2,7 +2,7 @@ import logging
2import os 2import os
3import re 3import re
4import zipfile 4import zipfile
5from typing import Dict, Set, Pattern 5from typing import Dict, Set, Pattern, Tuple
6 6
7import xml.etree.ElementTree as ET # type: ignore 7import xml.etree.ElementTree as ET # type: ignore
8 8
@@ -14,9 +14,8 @@ from .archive import ArchiveBasedAbstractParser
14assert Set 14assert Set
15assert Pattern 15assert Pattern
16 16
17def _parse_xml(full_path: str): 17def _parse_xml(full_path: str) -> Tuple[ET.ElementTree, Dict[str, str]]:
18 """ This function parses XML, with namespace support. """ 18 """ This function parses XML, with namespace support. """
19
20 namespace_map = dict() 19 namespace_map = dict()
21 for _, (key, value) in ET.iterparse(full_path, ("start-ns", )): 20 for _, (key, value) in ET.iterparse(full_path, ("start-ns", )):
22 # The ns[0-9]+ namespaces are reserved for internal usage, so 21 # The ns[0-9]+ namespaces are reserved for internal usage, so
@@ -183,20 +182,20 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
183 182
184 parent_map = {c:p for p in tree.iter() for c in p} 183 parent_map = {c:p for p in tree.iter() for c in p}
185 184
186 elements = list() 185 elements_del = list()
187 for element in tree.iterfind('.//w:del', namespace): 186 for element in tree.iterfind('.//w:del', namespace):
188 elements.append(element) 187 elements_del.append(element)
189 for element in elements: 188 for element in elements_del:
190 parent_map[element].remove(element) 189 parent_map[element].remove(element)
191 190
192 elements = list() 191 elements_ins = list()
193 for element in tree.iterfind('.//w:ins', namespace): 192 for element in tree.iterfind('.//w:ins', namespace):
194 for position, item in enumerate(tree.iter()): # pragma: no cover 193 for position, item in enumerate(tree.iter()): # pragma: no cover
195 if item == element: 194 if item == element:
196 for children in element.iterfind('./*'): 195 for children in element.iterfind('./*'):
197 elements.append((element, position, children)) 196 elements_ins.append((element, position, children))
198 break 197 break
199 for (element, position, children) in elements: 198 for (element, position, children) in elements_ins:
200 parent_map[element].insert(position, children) 199 parent_map[element].insert(position, children)
201 parent_map[element].remove(element) 200 parent_map[element].remove(element)
202 201