summaryrefslogtreecommitdiff
path: root/libmat2
diff options
context:
space:
mode:
Diffstat (limited to 'libmat2')
-rw-r--r--libmat2/office.py33
1 files changed, 33 insertions, 0 deletions
diff --git a/libmat2/office.py b/libmat2/office.py
index 3a290d8..f182277 100644
--- a/libmat2/office.py
+++ b/libmat2/office.py
@@ -323,6 +323,39 @@ class MSOfficeParser(ZipParser):
323 tree.write(full_path, xml_declaration=True, encoding='utf-8') 323 tree.write(full_path, xml_declaration=True, encoding='utf-8')
324 return True 324 return True
325 325
326 @staticmethod
327 def __remove_document_comment_meta(full_path: str) -> bool:
328 try:
329 tree, namespace = _parse_xml(full_path)
330 except ET.ParseError as e: # pragma: no cover
331 logging.error("Unable to parse %s: %s", full_path, e)
332 return False
333
334 # search the docs to see if we can bail early
335 range_start = tree.find('.//w:commentRangeStart', namespace)
336 range_end = tree.find('.//w:commentRangeEnd', namespace)
337 references = tree.find('.//w:commentReference', namespace)
338 if range_start is None and range_end is None and references is None:
339 return True # No comment meta tags are present
340
341 parent_map = {c:p for p in tree.iter() for c in p}
342
343 # iterate over the elements and add them to list
344 elements_del = list()
345 for element in tree.iterfind('.//w:commentRangeStart', namespace):
346 elements_del.append(element)
347 for element in tree.iterfind('.//w:commentRangeEnd', namespace):
348 elements_del.append(element)
349 for element in tree.iterfind('.//w:commentReference', namespace):
350 elements_del.append(element)
351
352 # remove the elements
353 for element in elements_del:
354 parent_map[element].remove(element)
355
356 tree.write(full_path, xml_declaration=True, encoding='utf-8')
357 return True
358
326 def __remove_content_type_members(self, full_path: str) -> bool: 359 def __remove_content_type_members(self, full_path: str) -> bool:
327 """ The method will remove the dangling references 360 """ The method will remove the dangling references
328 form the [Content_Types].xml file, since MS office doesn't like them 361 form the [Content_Types].xml file, since MS office doesn't like them