summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjvoisin2011-07-30 22:15:38 +0200
committerjvoisin2011-07-30 22:15:38 +0200
commit4e44cba5d6b65415f145ba3ded191c07a8c9c660 (patch)
tree74c56c9d90be6791361f25fa66a943e175d61e9b
parent5715ba52f2238af513b0b87f4aa3c0158d2c84ba (diff)
Outputed opendocument do not carry any traces of possible previous metadata
-rw-r--r--lib/office.py19
1 files changed, 13 insertions, 6 deletions
diff --git a/lib/office.py b/lib/office.py
index 2320e40..ba12295 100644
--- a/lib/office.py
+++ b/lib/office.py
@@ -6,6 +6,7 @@ import os
6import logging 6import logging
7import zipfile 7import zipfile
8import re 8import re
9import fileinput
9from xml.etree import ElementTree 10from xml.etree import ElementTree
10 11
11try: 12try:
@@ -54,17 +55,23 @@ class OpenDocumentStripper(archive.GenericArchiveStripper):
54 allowZip64=True) 55 allowZip64=True)
55 for item in zipin.namelist(): 56 for item in zipin.namelist():
56 name = os.path.join(self.tempdir, item) 57 name = os.path.join(self.tempdir, item)
57 if item.endswith('.xml') or item == 'mimetype': 58 if item.endswith('manifest.xml'):
59 zipin.extract(item, self.tempdir)
60 for line in fileinput.input(name, inplace=1):
61 #remove the line which contains "meta.xml"
62 line = line.strip()
63 if not 'meta.xml' in line:
64 print line
65 zipout.write(name, item)
66 mat.secure_remove(name)
67
68 elif item.endswith('.xml') or item == 'mimetype':
58 #keep .xml files, and the "manifest" file 69 #keep .xml files, and the "manifest" file
59 if item != 'meta.xml': # contains the metadata 70 if item != 'meta.xml': # contains the metadata
60 zipin.extract(item, self.tempdir) 71 zipin.extract(item, self.tempdir)
61 zipout.write(name, item) 72 zipout.write(name, item)
62 mat.secure_remove(name) 73 mat.secure_remove(name)
63 elif item.endswith('manifest.xml'): 74
64 zipin.extract(item, self.tempdir)
65 #remove line meta.xml
66 zipout.write(name, item)
67 mat.secure_remove(name)
68 else: 75 else:
69 zipin.extract(item, self.tempdir) 76 zipin.extract(item, self.tempdir)
70 if os.path.isfile(name): 77 if os.path.isfile(name):