summaryrefslogtreecommitdiff
path: root/libmat2/office.py
diff options
context:
space:
mode:
authorjvoisin2018-07-09 01:11:44 +0200
committerjvoisin2018-07-09 01:11:44 +0200
commit080d6769ca22cd797d02d601c71f51d3eba6ee64 (patch)
treee874a4ab89e012fb8782fec6a6ddecf78ae77e38 /libmat2/office.py
parent86fe3aa584f872ff92d6b8bf4b933e6794d5cd71 (diff)
Make pylint even happier
Diffstat (limited to 'libmat2/office.py')
-rw-r--r--libmat2/office.py47
1 files changed, 26 insertions, 21 deletions
diff --git a/libmat2/office.py b/libmat2/office.py
index 75d6744..b3fde7e 100644
--- a/libmat2/office.py
+++ b/libmat2/office.py
@@ -20,18 +20,18 @@ assert Pattern
20def _parse_xml(full_path: str): 20def _parse_xml(full_path: str):
21 """ This function parse XML with namespace support. """ 21 """ This function parse XML with namespace support. """
22 def parse_map(f): # etree support for ns is a bit rough 22 def parse_map(f): # etree support for ns is a bit rough
23 ns_map = dict() 23 namespace_map = dict()
24 for _, (k, v) in ET.iterparse(f, ("start-ns", )): 24 for _, (key, value) in ET.iterparse(f, ("start-ns", )):
25 ns_map[k] = v 25 namespace_map[key] = value
26 return ns_map 26 return namespace_map
27 27
28 ns = parse_map(full_path) 28 namespace_map = parse_map(full_path)
29 29
30 # Register the namespaces 30 # Register the namespaces
31 for k, v in ns.items(): 31 for key, value in namespace_map.items():
32 ET.register_namespace(k, v) 32 ET.register_namespace(key, value)
33 33
34 return ET.parse(full_path), ns 34 return ET.parse(full_path), namespace_map
35 35
36 36
37class ArchiveBasedAbstractParser(abstract.AbstractParser): 37class ArchiveBasedAbstractParser(abstract.AbstractParser):
@@ -53,15 +53,18 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
53 def _specific_cleanup(self, full_path: str) -> bool: 53 def _specific_cleanup(self, full_path: str) -> bool:
54 """ This method can be used to apply specific treatment 54 """ This method can be used to apply specific treatment
55 to files present in the archive.""" 55 to files present in the archive."""
56 # pylint: disable=unused-argument,no-self-use
56 return True # pragma: no cover 57 return True # pragma: no cover
57 58
58 def _clean_zipinfo(self, zipinfo: zipfile.ZipInfo) -> zipfile.ZipInfo: 59 @staticmethod
60 def _clean_zipinfo(zipinfo: zipfile.ZipInfo) -> zipfile.ZipInfo:
59 zipinfo.create_system = 3 # Linux 61 zipinfo.create_system = 3 # Linux
60 zipinfo.comment = b'' 62 zipinfo.comment = b''
61 zipinfo.date_time = (1980, 1, 1, 0, 0, 0) 63 zipinfo.date_time = (1980, 1, 1, 0, 0, 0)
62 return zipinfo 64 return zipinfo
63 65
64 def _get_zipinfo_meta(self, zipinfo: zipfile.ZipInfo) -> Dict[str, str]: 66 @staticmethod
67 def _get_zipinfo_meta(zipinfo: zipfile.ZipInfo) -> Dict[str, str]:
65 metadata = {} 68 metadata = {}
66 if zipinfo.create_system == 3: 69 if zipinfo.create_system == 3:
67 #metadata['create_system'] = 'Linux' 70 #metadata['create_system'] = 'Linux'
@@ -142,29 +145,30 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
142 '^docProps/', 145 '^docProps/',
143 })) 146 }))
144 147
145 def __remove_revisions(self, full_path: str) -> bool: 148 @staticmethod
149 def __remove_revisions(full_path: str) -> bool:
146 """ In this function, we're changing the XML 150 """ In this function, we're changing the XML
147 document in two times, since we don't want 151 document in two times, since we don't want
148 to change the tree we're iterating on.""" 152 to change the tree we're iterating on."""
149 try: 153 try:
150 tree, ns = _parse_xml(full_path) 154 tree, namespace = _parse_xml(full_path)
151 except ET.ParseError: 155 except ET.ParseError:
152 return False 156 return False
153 157
154 # No revisions are present 158 # No revisions are present
155 del_presence = tree.find('.//w:del', ns) 159 del_presence = tree.find('.//w:del', namespace)
156 ins_presence = tree.find('.//w:ins', ns) 160 ins_presence = tree.find('.//w:ins', namespace)
157 if del_presence is None and ins_presence is None: 161 if del_presence is None and ins_presence is None:
158 return True 162 return True
159 163
160 parent_map = {c:p for p in tree.iter() for c in p} 164 parent_map = {c:p for p in tree.iter() for c in p}
161 165
162 elements = list([element for element in tree.iterfind('.//w:del', ns)]) 166 elements = list([element for element in tree.iterfind('.//w:del', namespace)])
163 for element in elements: 167 for element in elements:
164 parent_map[element].remove(element) 168 parent_map[element].remove(element)
165 169
166 elements = list() 170 elements = list()
167 for element in tree.iterfind('.//w:ins', ns): 171 for element in tree.iterfind('.//w:ins', namespace):
168 for position, item in enumerate(tree.iter()): #pragma: no cover 172 for position, item in enumerate(tree.iter()): #pragma: no cover
169 if item == element: 173 if item == element:
170 for children in element.iterfind('./*'): 174 for children in element.iterfind('./*'):
@@ -231,17 +235,18 @@ class LibreOfficeParser(ArchiveBasedAbstractParser):
231 })) 235 }))
232 236
233 237
234 def __remove_revisions(self, full_path: str) -> bool: 238 @staticmethod
239 def __remove_revisions(full_path: str) -> bool:
235 try: 240 try:
236 tree, ns = _parse_xml(full_path) 241 tree, namespace = _parse_xml(full_path)
237 except ET.ParseError: 242 except ET.ParseError:
238 return False 243 return False
239 244
240 if 'office' not in ns.keys(): # no revisions in the current file 245 if 'office' not in namespace.keys(): # no revisions in the current file
241 return True 246 return True
242 247
243 for text in tree.getroot().iterfind('.//office:text', ns): 248 for text in tree.getroot().iterfind('.//office:text', namespace):
244 for changes in text.iterfind('.//text:tracked-changes', ns): 249 for changes in text.iterfind('.//text:tracked-changes', namespace):
245 text.remove(changes) 250 text.remove(changes)
246 251
247 tree.write(full_path, xml_declaration=True) 252 tree.write(full_path, xml_declaration=True)