diff options
| author | Daniel Kahn Gillmor | 2018-08-31 15:25:46 -0400 |
|---|---|---|
| committer | Daniel Kahn Gillmor | 2018-09-04 16:13:33 -0400 |
| commit | 4192a2daa3a24674740ed38913ff40309d6c1a31 (patch) | |
| tree | a36073283181959a8bf33c4307f6c2424eefd209 /libmat2/office.py | |
| parent | 9ce458cb3bd88cddd2ca09f0571c0f7cf7572f54 (diff) | |
office: create policy for what to do about unknown members
previously, encountering an unknown member meant that any parser of
this type would abort.
now, the user can set parser.unknown_member_policy to either 'omit' or
'keep' if they don't want the current action of 'abort'
note that this causes pylint to complain about branching depth for
remove_all() because of the nuanced error-handling. I've disabled
this check.
Diffstat (limited to 'libmat2/office.py')
| -rw-r--r-- | libmat2/office.py | 33 |
1 files changed, 25 insertions, 8 deletions
diff --git a/libmat2/office.py b/libmat2/office.py index 36b7378..23249b4 100644 --- a/libmat2/office.py +++ b/libmat2/office.py | |||
| @@ -40,6 +40,10 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): | |||
| 40 | # no matter if they are supported or not. | 40 | # no matter if they are supported or not. |
| 41 | files_to_omit = set() # type: Set[Pattern] | 41 | files_to_omit = set() # type: Set[Pattern] |
| 42 | 42 | ||
| 43 | # what should the parser do if it encounters an unknown file in | ||
| 44 | # the archive? valid policies are 'abort', 'omit', 'keep' | ||
| 45 | unknown_member_policy = 'abort' # type: str | ||
| 46 | |||
| 43 | def __init__(self, filename): | 47 | def __init__(self, filename): |
| 44 | super().__init__(filename) | 48 | super().__init__(filename) |
| 45 | try: # better fail here than later | 49 | try: # better fail here than later |
| @@ -79,6 +83,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): | |||
| 79 | return metadata | 83 | return metadata |
| 80 | 84 | ||
| 81 | def remove_all(self) -> bool: | 85 | def remove_all(self) -> bool: |
| 86 | # pylint: disable=too-many-branches | ||
| 82 | with zipfile.ZipFile(self.filename) as zin,\ | 87 | with zipfile.ZipFile(self.filename) as zin,\ |
| 83 | zipfile.ZipFile(self.output_filename, 'w') as zout: | 88 | zipfile.ZipFile(self.output_filename, 'w') as zout: |
| 84 | 89 | ||
| @@ -107,14 +112,26 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): | |||
| 107 | # supported files that we want to clean then add | 112 | # supported files that we want to clean then add |
| 108 | tmp_parser, mtype = parser_factory.get_parser(full_path) # type: ignore | 113 | tmp_parser, mtype = parser_factory.get_parser(full_path) # type: ignore |
| 109 | if not tmp_parser: | 114 | if not tmp_parser: |
| 110 | shutil.rmtree(temp_folder) | 115 | if self.unknown_member_policy == 'omit': |
| 111 | os.remove(self.output_filename) | 116 | logging.warning("In file %s, omitting unknown element %s (format: %s)", |
| 112 | logging.error("In file %s, element %s's format (%s) " + | 117 | self.filename, item.filename, mtype) |
| 113 | "isn't supported", | 118 | continue |
| 114 | self.filename, item.filename, mtype) | 119 | elif self.unknown_member_policy == 'keep': |
| 115 | return False | 120 | logging.warning("In file %s, keeping unknown element %s (format: %s)", |
| 116 | tmp_parser.remove_all() | 121 | self.filename, item.filename, mtype) |
| 117 | os.rename(tmp_parser.output_filename, full_path) | 122 | else: |
| 123 | if self.unknown_member_policy != 'abort': | ||
| 124 | logging.warning("Invalid unknown_member_policy %s, " + | ||
| 125 | "treating as 'abort'", self.unknown_member_policy) | ||
| 126 | shutil.rmtree(temp_folder) | ||
| 127 | os.remove(self.output_filename) | ||
| 128 | logging.error("In file %s, element %s's format (%s) " + | ||
| 129 | "isn't supported", | ||
| 130 | self.filename, item.filename, mtype) | ||
| 131 | return False | ||
| 132 | if tmp_parser: | ||
| 133 | tmp_parser.remove_all() | ||
| 134 | os.rename(tmp_parser.output_filename, full_path) | ||
| 118 | 135 | ||
| 119 | zinfo = zipfile.ZipInfo(item.filename) # type: ignore | 136 | zinfo = zipfile.ZipInfo(item.filename) # type: ignore |
| 120 | clean_zinfo = self._clean_zipinfo(zinfo) | 137 | clean_zinfo = self._clean_zipinfo(zinfo) |
