From 4e44cba5d6b65415f145ba3ded191c07a8c9c660 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Sat, 30 Jul 2011 22:15:38 +0200 Subject: Outputed opendocument do not carry any traces of possible previous metadata --- lib/office.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/office.py b/lib/office.py index 2320e40..ba12295 100644 --- a/lib/office.py +++ b/lib/office.py @@ -6,6 +6,7 @@ import os import logging import zipfile import re +import fileinput from xml.etree import ElementTree try: @@ -54,17 +55,23 @@ class OpenDocumentStripper(archive.GenericArchiveStripper): allowZip64=True) for item in zipin.namelist(): name = os.path.join(self.tempdir, item) - if item.endswith('.xml') or item == 'mimetype': + if item.endswith('manifest.xml'): + zipin.extract(item, self.tempdir) + for line in fileinput.input(name, inplace=1): + #remove the line which contains "meta.xml" + line = line.strip() + if not 'meta.xml' in line: + print line + zipout.write(name, item) + mat.secure_remove(name) + + elif item.endswith('.xml') or item == 'mimetype': #keep .xml files, and the "manifest" file if item != 'meta.xml': # contains the metadata zipin.extract(item, self.tempdir) zipout.write(name, item) mat.secure_remove(name) - elif item.endswith('manifest.xml'): - zipin.extract(item, self.tempdir) - #remove line meta.xml - zipout.write(name, item) - mat.secure_remove(name) + else: zipin.extract(item, self.tempdir) if os.path.isfile(name): -- cgit v1.3