summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorjvoisin2011-08-05 11:27:23 +0200
committerjvoisin2011-08-05 11:27:23 +0200
commit943b0faf21883089827d976c48a231293c091640 (patch)
treea895f71e572b84aaa60dfa23636129db71de93f4 /lib
parent5a3661d496dc8071f856242211e1a826f4d54c10 (diff)
More robust implementation of is_clean
Diffstat (limited to 'lib')
-rw-r--r--lib/office.py12
1 files changed, 11 insertions, 1 deletions
diff --git a/lib/office.py b/lib/office.py
index b07c1e7..6fdcf2d 100644
--- a/lib/office.py
+++ b/lib/office.py
@@ -139,6 +139,7 @@ class PdfStripper(parser.GenericParser):
139 ''' 139 '''
140 Opening the pdf with poppler, then doing a render 140 Opening the pdf with poppler, then doing a render
141 on a cairo pdfsurface for each pages. 141 on a cairo pdfsurface for each pages.
142 Thanks to Lunar^for the idea.
142 http://cairographics.org/documentation/pycairo/2/ 143 http://cairographics.org/documentation/pycairo/2/
143 python-poppler is not documented at all : have fun ;) 144 python-poppler is not documented at all : have fun ;)
144 ''' 145 '''
@@ -188,11 +189,20 @@ class OpenXmlStripper(archive.GenericArchiveStripper):
188 (I don't like this format.) 189 (I don't like this format.)
189 ''' 190 '''
190 def is_clean(self): 191 def is_clean(self):
192 '''
193 Check if the file is clean from harmful metadatas
194 '''
191 zipin = zipfile.ZipFile(self.filename, 'r') 195 zipin = zipfile.ZipFile(self.filename, 'r')
192 for item in zipin.namelist(): 196 for item in zipin.namelist():
193 if item.startswith('docProps/'): 197 if item.startswith('docProps/'):
194 return False 198 return False
195 return True 199 zipin.close()
200 czf = archive.ZipStripper(self.filename, self.parser,
201 'application/zip', self.backup, self.add2archive)
202 if not czf.is_clean():
203 return False
204 else:
205 return True
196 206
197 def get_meta(self): 207 def get_meta(self):
198 ''' 208 '''