summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorjvoisin2011-08-05 11:42:40 +0200
committerjvoisin2011-08-05 11:42:40 +0200
commit503e926812d35032ed527c81e78444f362a5d527 (patch)
tree599cd3940e3883b8fc275fb924d5f2ca0ae34a07 /lib
parent943b0faf21883089827d976c48a231293c091640 (diff)
Support of openxml office format
Diffstat (limited to 'lib')
-rw-r--r--lib/archive.py2
-rw-r--r--lib/office.py43
2 files changed, 44 insertions, 1 deletions
diff --git a/lib/archive.py b/lib/archive.py
index 5956a1e..fb15705 100644
--- a/lib/archive.py
+++ b/lib/archive.py
@@ -97,7 +97,7 @@ harmless format' % item.filename)
97 _, ext = os.path.splitext(name) 97 _, ext = os.path.splitext(name)
98 bname = os.path.basename(item.filename) 98 bname = os.path.basename(item.filename)
99 if ext not in parser.NOMETA: 99 if ext not in parser.NOMETA:
100 if bname != 'mimetype': 100 if bname != 'mimetype' and bname != '.rels':
101 return False 101 return False
102 mat.secure_remove(name) 102 mat.secure_remove(name)
103 zipin.close() 103 zipin.close()
diff --git a/lib/office.py b/lib/office.py
index 6fdcf2d..b7c607f 100644
--- a/lib/office.py
+++ b/lib/office.py
@@ -188,6 +188,49 @@ class OpenXmlStripper(archive.GenericArchiveStripper):
188 It contains mostly xml, but can have media blobs, crap, ... 188 It contains mostly xml, but can have media blobs, crap, ...
189 (I don't like this format.) 189 (I don't like this format.)
190 ''' 190 '''
191 def _remove_all(self, method):
192 '''
193 FIXME ?
194 There is a patch implementing the Zipfile.remove()
195 method here : http://bugs.python.org/issue6818
196 '''
197 zipin = zipfile.ZipFile(self.filename, 'r')
198 zipout = zipfile.ZipFile(self.output, 'w',
199 allowZip64=True)
200 for item in zipin.namelist():
201 name = os.path.join(self.tempdir, item)
202 _, ext = os.path.splitext(name)
203 if item.startswith('docProps/'): # metadatas
204 pass
205 elif ext in parser.NOMETA or item == '.rels':
206 #keep parser.NOMETA files, and the file named ".rels"
207 zipin.extract(item, self.tempdir)
208 zipout.write(name, item)
209 mat.secure_remove(name)
210 else:
211 zipin.extract(item, self.tempdir)
212 if os.path.isfile(name):
213 try:
214 cfile = mat.create_class_file(name, False,
215 self.add2archive)
216 if method == 'normal':
217 cfile.remove_all()
218 else:
219 cfile.remove_all_ugly()
220 logging.debug('Processing %s from %s' % (item,
221 self.filename))
222 zipout.write(name, item)
223 except:
224 logging.info('%s\' fileformat is not supported' % item)
225 if self.add2archive:
226 zipout.write(name, item)
227 mat.secure_remove(name)
228 zipout.comment = ''
229 logging.info('%s treated' % self.filename)
230 zipin.close()
231 zipout.close()
232 self.do_backup()
233
191 def is_clean(self): 234 def is_clean(self):
192 ''' 235 '''
193 Check if the file is clean from harmful metadatas 236 Check if the file is clean from harmful metadatas