diff options
Diffstat (limited to 'libmat2')
| -rw-r--r-- | libmat2/office.py | 33 |
1 files changed, 33 insertions, 0 deletions
diff --git a/libmat2/office.py b/libmat2/office.py index 3a290d8..f182277 100644 --- a/libmat2/office.py +++ b/libmat2/office.py | |||
| @@ -323,6 +323,39 @@ class MSOfficeParser(ZipParser): | |||
| 323 | tree.write(full_path, xml_declaration=True, encoding='utf-8') | 323 | tree.write(full_path, xml_declaration=True, encoding='utf-8') |
| 324 | return True | 324 | return True |
| 325 | 325 | ||
| 326 | @staticmethod | ||
| 327 | def __remove_document_comment_meta(full_path: str) -> bool: | ||
| 328 | try: | ||
| 329 | tree, namespace = _parse_xml(full_path) | ||
| 330 | except ET.ParseError as e: # pragma: no cover | ||
| 331 | logging.error("Unable to parse %s: %s", full_path, e) | ||
| 332 | return False | ||
| 333 | |||
| 334 | # search the docs to see if we can bail early | ||
| 335 | range_start = tree.find('.//w:commentRangeStart', namespace) | ||
| 336 | range_end = tree.find('.//w:commentRangeEnd', namespace) | ||
| 337 | references = tree.find('.//w:commentReference', namespace) | ||
| 338 | if range_start is None and range_end is None and references is None: | ||
| 339 | return True # No comment meta tags are present | ||
| 340 | |||
| 341 | parent_map = {c:p for p in tree.iter() for c in p} | ||
| 342 | |||
| 343 | # iterate over the elements and add them to list | ||
| 344 | elements_del = list() | ||
| 345 | for element in tree.iterfind('.//w:commentRangeStart', namespace): | ||
| 346 | elements_del.append(element) | ||
| 347 | for element in tree.iterfind('.//w:commentRangeEnd', namespace): | ||
| 348 | elements_del.append(element) | ||
| 349 | for element in tree.iterfind('.//w:commentReference', namespace): | ||
| 350 | elements_del.append(element) | ||
| 351 | |||
| 352 | # remove the elements | ||
| 353 | for element in elements_del: | ||
| 354 | parent_map[element].remove(element) | ||
| 355 | |||
| 356 | tree.write(full_path, xml_declaration=True, encoding='utf-8') | ||
| 357 | return True | ||
| 358 | |||
| 326 | def __remove_content_type_members(self, full_path: str) -> bool: | 359 | def __remove_content_type_members(self, full_path: str) -> bool: |
| 327 | """ The method will remove the dangling references | 360 | """ The method will remove the dangling references |
| 328 | form the [Content_Types].xml file, since MS office doesn't like them | 361 | form the [Content_Types].xml file, since MS office doesn't like them |
