diff options
| author | jvoisin | 2018-07-09 01:11:44 +0200 |
|---|---|---|
| committer | jvoisin | 2018-07-09 01:11:44 +0200 |
| commit | 080d6769ca22cd797d02d601c71f51d3eba6ee64 (patch) | |
| tree | e874a4ab89e012fb8782fec6a6ddecf78ae77e38 | |
| parent | 86fe3aa584f872ff92d6b8bf4b933e6794d5cd71 (diff) | |
Make pylint even happier
| -rw-r--r-- | libmat2/office.py | 47 |
1 files changed, 26 insertions, 21 deletions
diff --git a/libmat2/office.py b/libmat2/office.py index 75d6744..b3fde7e 100644 --- a/libmat2/office.py +++ b/libmat2/office.py | |||
| @@ -20,18 +20,18 @@ assert Pattern | |||
| 20 | def _parse_xml(full_path: str): | 20 | def _parse_xml(full_path: str): |
| 21 | """ This function parse XML with namespace support. """ | 21 | """ This function parse XML with namespace support. """ |
| 22 | def parse_map(f): # etree support for ns is a bit rough | 22 | def parse_map(f): # etree support for ns is a bit rough |
| 23 | ns_map = dict() | 23 | namespace_map = dict() |
| 24 | for _, (k, v) in ET.iterparse(f, ("start-ns", )): | 24 | for _, (key, value) in ET.iterparse(f, ("start-ns", )): |
| 25 | ns_map[k] = v | 25 | namespace_map[key] = value |
| 26 | return ns_map | 26 | return namespace_map |
| 27 | 27 | ||
| 28 | ns = parse_map(full_path) | 28 | namespace_map = parse_map(full_path) |
| 29 | 29 | ||
| 30 | # Register the namespaces | 30 | # Register the namespaces |
| 31 | for k, v in ns.items(): | 31 | for key, value in namespace_map.items(): |
| 32 | ET.register_namespace(k, v) | 32 | ET.register_namespace(key, value) |
| 33 | 33 | ||
| 34 | return ET.parse(full_path), ns | 34 | return ET.parse(full_path), namespace_map |
| 35 | 35 | ||
| 36 | 36 | ||
| 37 | class ArchiveBasedAbstractParser(abstract.AbstractParser): | 37 | class ArchiveBasedAbstractParser(abstract.AbstractParser): |
| @@ -53,15 +53,18 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): | |||
| 53 | def _specific_cleanup(self, full_path: str) -> bool: | 53 | def _specific_cleanup(self, full_path: str) -> bool: |
| 54 | """ This method can be used to apply specific treatment | 54 | """ This method can be used to apply specific treatment |
| 55 | to files present in the archive.""" | 55 | to files present in the archive.""" |
| 56 | # pylint: disable=unused-argument,no-self-use | ||
| 56 | return True # pragma: no cover | 57 | return True # pragma: no cover |
| 57 | 58 | ||
| 58 | def _clean_zipinfo(self, zipinfo: zipfile.ZipInfo) -> zipfile.ZipInfo: | 59 | @staticmethod |
| 60 | def _clean_zipinfo(zipinfo: zipfile.ZipInfo) -> zipfile.ZipInfo: | ||
| 59 | zipinfo.create_system = 3 # Linux | 61 | zipinfo.create_system = 3 # Linux |
| 60 | zipinfo.comment = b'' | 62 | zipinfo.comment = b'' |
| 61 | zipinfo.date_time = (1980, 1, 1, 0, 0, 0) | 63 | zipinfo.date_time = (1980, 1, 1, 0, 0, 0) |
| 62 | return zipinfo | 64 | return zipinfo |
| 63 | 65 | ||
| 64 | def _get_zipinfo_meta(self, zipinfo: zipfile.ZipInfo) -> Dict[str, str]: | 66 | @staticmethod |
| 67 | def _get_zipinfo_meta(zipinfo: zipfile.ZipInfo) -> Dict[str, str]: | ||
| 65 | metadata = {} | 68 | metadata = {} |
| 66 | if zipinfo.create_system == 3: | 69 | if zipinfo.create_system == 3: |
| 67 | #metadata['create_system'] = 'Linux' | 70 | #metadata['create_system'] = 'Linux' |
| @@ -142,29 +145,30 @@ class MSOfficeParser(ArchiveBasedAbstractParser): | |||
| 142 | '^docProps/', | 145 | '^docProps/', |
| 143 | })) | 146 | })) |
| 144 | 147 | ||
| 145 | def __remove_revisions(self, full_path: str) -> bool: | 148 | @staticmethod |
| 149 | def __remove_revisions(full_path: str) -> bool: | ||
| 146 | """ In this function, we're changing the XML | 150 | """ In this function, we're changing the XML |
| 147 | document in two times, since we don't want | 151 | document in two times, since we don't want |
| 148 | to change the tree we're iterating on.""" | 152 | to change the tree we're iterating on.""" |
| 149 | try: | 153 | try: |
| 150 | tree, ns = _parse_xml(full_path) | 154 | tree, namespace = _parse_xml(full_path) |
| 151 | except ET.ParseError: | 155 | except ET.ParseError: |
| 152 | return False | 156 | return False |
| 153 | 157 | ||
| 154 | # No revisions are present | 158 | # No revisions are present |
| 155 | del_presence = tree.find('.//w:del', ns) | 159 | del_presence = tree.find('.//w:del', namespace) |
| 156 | ins_presence = tree.find('.//w:ins', ns) | 160 | ins_presence = tree.find('.//w:ins', namespace) |
| 157 | if del_presence is None and ins_presence is None: | 161 | if del_presence is None and ins_presence is None: |
| 158 | return True | 162 | return True |
| 159 | 163 | ||
| 160 | parent_map = {c:p for p in tree.iter() for c in p} | 164 | parent_map = {c:p for p in tree.iter() for c in p} |
| 161 | 165 | ||
| 162 | elements = list([element for element in tree.iterfind('.//w:del', ns)]) | 166 | elements = list([element for element in tree.iterfind('.//w:del', namespace)]) |
| 163 | for element in elements: | 167 | for element in elements: |
| 164 | parent_map[element].remove(element) | 168 | parent_map[element].remove(element) |
| 165 | 169 | ||
| 166 | elements = list() | 170 | elements = list() |
| 167 | for element in tree.iterfind('.//w:ins', ns): | 171 | for element in tree.iterfind('.//w:ins', namespace): |
| 168 | for position, item in enumerate(tree.iter()): #pragma: no cover | 172 | for position, item in enumerate(tree.iter()): #pragma: no cover |
| 169 | if item == element: | 173 | if item == element: |
| 170 | for children in element.iterfind('./*'): | 174 | for children in element.iterfind('./*'): |
| @@ -231,17 +235,18 @@ class LibreOfficeParser(ArchiveBasedAbstractParser): | |||
| 231 | })) | 235 | })) |
| 232 | 236 | ||
| 233 | 237 | ||
| 234 | def __remove_revisions(self, full_path: str) -> bool: | 238 | @staticmethod |
| 239 | def __remove_revisions(full_path: str) -> bool: | ||
| 235 | try: | 240 | try: |
| 236 | tree, ns = _parse_xml(full_path) | 241 | tree, namespace = _parse_xml(full_path) |
| 237 | except ET.ParseError: | 242 | except ET.ParseError: |
| 238 | return False | 243 | return False |
| 239 | 244 | ||
| 240 | if 'office' not in ns.keys(): # no revisions in the current file | 245 | if 'office' not in namespace.keys(): # no revisions in the current file |
| 241 | return True | 246 | return True |
| 242 | 247 | ||
| 243 | for text in tree.getroot().iterfind('.//office:text', ns): | 248 | for text in tree.getroot().iterfind('.//office:text', namespace): |
| 244 | for changes in text.iterfind('.//text:tracked-changes', ns): | 249 | for changes in text.iterfind('.//text:tracked-changes', namespace): |
| 245 | text.remove(changes) | 250 | text.remove(changes) |
| 246 | 251 | ||
| 247 | tree.write(full_path, xml_declaration=True) | 252 | tree.write(full_path, xml_declaration=True) |
