From 503e926812d35032ed527c81e78444f362a5d527 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Fri, 5 Aug 2011 11:42:40 +0200 Subject: Support of openxml office format --- lib/archive.py | 2 +- lib/office.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/archive.py b/lib/archive.py index 5956a1e..fb15705 100644 --- a/lib/archive.py +++ b/lib/archive.py @@ -97,7 +97,7 @@ harmless format' % item.filename) _, ext = os.path.splitext(name) bname = os.path.basename(item.filename) if ext not in parser.NOMETA: - if bname != 'mimetype': + if bname != 'mimetype' and bname != '.rels': return False mat.secure_remove(name) zipin.close() diff --git a/lib/office.py b/lib/office.py index 6fdcf2d..b7c607f 100644 --- a/lib/office.py +++ b/lib/office.py @@ -188,6 +188,49 @@ class OpenXmlStripper(archive.GenericArchiveStripper): It contains mostly xml, but can have media blobs, crap, ... (I don't like this format.) ''' + def _remove_all(self, method): + ''' + FIXME ? + There is a patch implementing the Zipfile.remove() + method here : http://bugs.python.org/issue6818 + ''' + zipin = zipfile.ZipFile(self.filename, 'r') + zipout = zipfile.ZipFile(self.output, 'w', + allowZip64=True) + for item in zipin.namelist(): + name = os.path.join(self.tempdir, item) + _, ext = os.path.splitext(name) + if item.startswith('docProps/'): # metadatas + pass + elif ext in parser.NOMETA or item == '.rels': + #keep parser.NOMETA files, and the file named ".rels" + zipin.extract(item, self.tempdir) + zipout.write(name, item) + mat.secure_remove(name) + else: + zipin.extract(item, self.tempdir) + if os.path.isfile(name): + try: + cfile = mat.create_class_file(name, False, + self.add2archive) + if method == 'normal': + cfile.remove_all() + else: + cfile.remove_all_ugly() + logging.debug('Processing %s from %s' % (item, + self.filename)) + zipout.write(name, item) + except: + logging.info('%s\' fileformat is not supported' % item) + if self.add2archive: + zipout.write(name, item) + mat.secure_remove(name) + zipout.comment = '' + logging.info('%s treated' % self.filename) + zipin.close() + zipout.close() + self.do_backup() + def is_clean(self): ''' Check if the file is clean from harmful metadatas -- cgit v1.3