summaryrefslogtreecommitdiff
path: root/lib/office.py
diff options
context:
space:
mode:
authorjvoisin2011-08-05 23:15:16 +0200
committerjvoisin2011-08-05 23:15:16 +0200
commit2b31addfe8808c4a0b72b66fa36519230e7c9398 (patch)
treefc0d83eb13cb3331f71dd74c7b55521656c0542a /lib/office.py
parentaa4e6cbf842b09f9b72be5efa0e45ae98a7af248 (diff)
Revert stupid optimisation(always test before commit !), and optimize the office.py processing
Diffstat (limited to 'lib/office.py')
-rw-r--r--lib/office.py34
1 files changed, 18 insertions, 16 deletions
diff --git a/lib/office.py b/lib/office.py
index 03e386b..bf50774 100644
--- a/lib/office.py
+++ b/lib/office.py
@@ -27,7 +27,8 @@ class OpenDocumentStripper(archive.GenericArchiveStripper):
27 27
28 def get_meta(self): 28 def get_meta(self):
29 ''' 29 '''
30 Return a dict with all the meta of the file 30 Return a dict with all the meta of the file by
31 trying to read the meta.xml file.
31 ''' 32 '''
32 zipin = zipfile.ZipFile(self.filename, 'r') 33 zipin = zipfile.ZipFile(self.filename, 'r')
33 metadata = {} 34 metadata = {}
@@ -46,11 +47,14 @@ class OpenDocumentStripper(archive.GenericArchiveStripper):
46 method here : http://bugs.python.org/issue6818 47 method here : http://bugs.python.org/issue6818
47 ''' 48 '''
48 zipin = zipfile.ZipFile(self.filename, 'r') 49 zipin = zipfile.ZipFile(self.filename, 'r')
49 zipout = zipfile.ZipFile(self.output, 'w', 50 zipout = zipfile.ZipFile(self.output, 'w', allowZip64=True)
50 allowZip64=True) 51
51 for item in zipin.namelist(): 52 for item in zipin.namelist():
52 name = os.path.join(self.tempdir, item) 53 name = os.path.join(self.tempdir, item)
54 _, ext = os.path.splitext(name)
55
53 if item.endswith('manifest.xml'): 56 if item.endswith('manifest.xml'):
57 # contain the list of all files present in the archive
54 zipin.extract(item, self.tempdir) 58 zipin.extract(item, self.tempdir)
55 for line in fileinput.input(name, inplace=1): 59 for line in fileinput.input(name, inplace=1):
56 #remove the line which contains "meta.xml" 60 #remove the line which contains "meta.xml"
@@ -60,8 +64,8 @@ class OpenDocumentStripper(archive.GenericArchiveStripper):
60 zipout.write(name, item) 64 zipout.write(name, item)
61 mat.secure_remove(name) 65 mat.secure_remove(name)
62 66
63 elif item.endswith('.xml') or item == 'mimetype': 67 elif ext in parser.NOMETA or item == 'mimetype':
64 #keep .xml files, and the "manifest" file 68 #keep NOMETA files, and the "manifest" file
65 if item != 'meta.xml': # contains the metadata 69 if item != 'meta.xml': # contains the metadata
66 zipin.extract(item, self.tempdir) 70 zipin.extract(item, self.tempdir)
67 zipout.write(name, item) 71 zipout.write(name, item)
@@ -98,14 +102,14 @@ class OpenDocumentStripper(archive.GenericArchiveStripper):
98 zipin = zipfile.ZipFile(self.filename, 'r') 102 zipin = zipfile.ZipFile(self.filename, 'r')
99 try: 103 try:
100 zipin.getinfo('meta.xml') 104 zipin.getinfo('meta.xml')
101 return False
102 except KeyError: # no meta.xml in the file 105 except KeyError: # no meta.xml in the file
103 zipin.close()
104 czf = archive.ZipStripper(self.filename, self.parser, 106 czf = archive.ZipStripper(self.filename, self.parser,
105 'application/zip', self.backup, self.add2archive) 107 'application/zip', self.backup, self.add2archive)
106 if not czf.is_clean(): 108 if czf.is_clean():
107 return False 109 zipin.close()
108 return True 110 return True
111 zipin.close()
112 return False
109 113
110 114
111class PdfStripper(parser.GenericParser): 115class PdfStripper(parser.GenericParser):
@@ -129,8 +133,7 @@ class PdfStripper(parser.GenericParser):
129 if key == 'creation-date' or key == 'mod-date': 133 if key == 'creation-date' or key == 'mod-date':
130 if self.document.get_property(key) != -1: 134 if self.document.get_property(key) != -1:
131 return False 135 return False
132 else: 136 elif self.document.get_property(key) is not None and \
133 if self.document.get_property(key) is not None and \
134 self.document.get_property(key) != '': 137 self.document.get_property(key) != '':
135 return False 138 return False
136 return True 139 return True
@@ -174,9 +177,8 @@ class PdfStripper(parser.GenericParser):
174 #creation and modification are set to -1 177 #creation and modification are set to -1
175 if self.document.get_property(key) != -1: 178 if self.document.get_property(key) != -1:
176 metadata[key] = self.document.get_property(key) 179 metadata[key] = self.document.get_property(key)
177 else: 180 elif self.document.get_property(key) is not None and \
178 if self.document.get_property(key) is not None and \ 181 self.document.get_property(key) != '':
179 self.document.get_property(key) != '':
180 metadata[key] = self.document.get_property(key) 182 metadata[key] = self.document.get_property(key)
181 return metadata 183 return metadata
182 184
@@ -209,7 +211,7 @@ class OpenXmlStripper(archive.GenericArchiveStripper):
209 mat.secure_remove(name) 211 mat.secure_remove(name)
210 else: 212 else:
211 zipin.extract(item, self.tempdir) 213 zipin.extract(item, self.tempdir)
212 if os.path.isfile(name): 214 if os.path.isfile(name): # don't care about folders
213 try: 215 try:
214 cfile = mat.create_class_file(name, False, 216 cfile = mat.create_class_file(name, False,
215 self.add2archive) 217 self.add2archive)