summaryrefslogtreecommitdiff
path: root/libmat2/office.py
diff options
context:
space:
mode:
authorDaniel Kahn Gillmor2018-08-31 15:25:46 -0400
committerDaniel Kahn Gillmor2018-09-04 16:13:33 -0400
commit4192a2daa3a24674740ed38913ff40309d6c1a31 (patch)
treea36073283181959a8bf33c4307f6c2424eefd209 /libmat2/office.py
parent9ce458cb3bd88cddd2ca09f0571c0f7cf7572f54 (diff)
office: create policy for what to do about unknown members
previously, encountering an unknown member meant that any parser of this type would abort. now, the user can set parser.unknown_member_policy to either 'omit' or 'keep' if they don't want the current action of 'abort' note that this causes pylint to complain about branching depth for remove_all() because of the nuanced error-handling. I've disabled this check.
Diffstat (limited to 'libmat2/office.py')
-rw-r--r--libmat2/office.py33
1 files changed, 25 insertions, 8 deletions
diff --git a/libmat2/office.py b/libmat2/office.py
index 36b7378..23249b4 100644
--- a/libmat2/office.py
+++ b/libmat2/office.py
@@ -40,6 +40,10 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
40 # no matter if they are supported or not. 40 # no matter if they are supported or not.
41 files_to_omit = set() # type: Set[Pattern] 41 files_to_omit = set() # type: Set[Pattern]
42 42
43 # what should the parser do if it encounters an unknown file in
44 # the archive? valid policies are 'abort', 'omit', 'keep'
45 unknown_member_policy = 'abort' # type: str
46
43 def __init__(self, filename): 47 def __init__(self, filename):
44 super().__init__(filename) 48 super().__init__(filename)
45 try: # better fail here than later 49 try: # better fail here than later
@@ -79,6 +83,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
79 return metadata 83 return metadata
80 84
81 def remove_all(self) -> bool: 85 def remove_all(self) -> bool:
86 # pylint: disable=too-many-branches
82 with zipfile.ZipFile(self.filename) as zin,\ 87 with zipfile.ZipFile(self.filename) as zin,\
83 zipfile.ZipFile(self.output_filename, 'w') as zout: 88 zipfile.ZipFile(self.output_filename, 'w') as zout:
84 89
@@ -107,14 +112,26 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
107 # supported files that we want to clean then add 112 # supported files that we want to clean then add
108 tmp_parser, mtype = parser_factory.get_parser(full_path) # type: ignore 113 tmp_parser, mtype = parser_factory.get_parser(full_path) # type: ignore
109 if not tmp_parser: 114 if not tmp_parser:
110 shutil.rmtree(temp_folder) 115 if self.unknown_member_policy == 'omit':
111 os.remove(self.output_filename) 116 logging.warning("In file %s, omitting unknown element %s (format: %s)",
112 logging.error("In file %s, element %s's format (%s) " + 117 self.filename, item.filename, mtype)
113 "isn't supported", 118 continue
114 self.filename, item.filename, mtype) 119 elif self.unknown_member_policy == 'keep':
115 return False 120 logging.warning("In file %s, keeping unknown element %s (format: %s)",
116 tmp_parser.remove_all() 121 self.filename, item.filename, mtype)
117 os.rename(tmp_parser.output_filename, full_path) 122 else:
123 if self.unknown_member_policy != 'abort':
124 logging.warning("Invalid unknown_member_policy %s, " +
125 "treating as 'abort'", self.unknown_member_policy)
126 shutil.rmtree(temp_folder)
127 os.remove(self.output_filename)
128 logging.error("In file %s, element %s's format (%s) " +
129 "isn't supported",
130 self.filename, item.filename, mtype)
131 return False
132 if tmp_parser:
133 tmp_parser.remove_all()
134 os.rename(tmp_parser.output_filename, full_path)
118 135
119 zinfo = zipfile.ZipInfo(item.filename) # type: ignore 136 zinfo = zipfile.ZipInfo(item.filename) # type: ignore
120 clean_zinfo = self._clean_zipinfo(zinfo) 137 clean_zinfo = self._clean_zipinfo(zinfo)