summaryrefslogtreecommitdiff
path: root/lib/office.py
diff options
context:
space:
mode:
Diffstat (limited to 'lib/office.py')
-rw-r--r--lib/office.py68
1 files changed, 15 insertions, 53 deletions
diff --git a/lib/office.py b/lib/office.py
index e1d738e..82b817e 100644
--- a/lib/office.py
+++ b/lib/office.py
@@ -49,7 +49,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper):
49 logging.debug('%s has no opendocument metadata' % self.filename) 49 logging.debug('%s has no opendocument metadata' % self.filename)
50 return metadata 50 return metadata
51 51
52 def _remove_all(self, method): 52 def _remove_all(self):
53 ''' 53 '''
54 FIXME ? 54 FIXME ?
55 There is a patch implementing the Zipfile.remove() 55 There is a patch implementing the Zipfile.remove()
@@ -84,10 +84,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper):
84 try: 84 try:
85 cfile = mat.create_class_file(name, False, 85 cfile = mat.create_class_file(name, False,
86 self.add2archive) 86 self.add2archive)
87 if method == 'normal': 87 cfile.remove_all()
88 cfile.remove_all()
89 else:
90 cfile.remove_all_strict()
91 logging.debug('Processing %s from %s' % (item, 88 logging.debug('Processing %s from %s' % (item,
92 self.filename)) 89 self.filename))
93 zipout.write(name, item) 90 zipout.write(name, item)
@@ -137,20 +134,17 @@ class PdfStripper(parser.GenericParser):
137 Check if the file is clean from harmful metadatas 134 Check if the file is clean from harmful metadatas
138 ''' 135 '''
139 for key in self.meta_list: 136 for key in self.meta_list:
140 if self.document.get_property(key) is not None and \ 137 if self.document.get_property(key) != None:
141 self.document.get_property(key) != '':
142 return False 138 return False
143 return True 139 return True
144 140
145
146 def remove_all(self): 141 def remove_all(self):
147 ''' 142 '''
148 Remove supperficial 143 Remove supperficial
149 ''' 144 '''
150 return self._remove_meta() 145 return self._remove_meta()
151 146
152 147 def _remove_meta(self):
153 def remove_all_strict(self):
154 ''' 148 '''
155 Opening the PDF with poppler, then doing a render 149 Opening the PDF with poppler, then doing a render
156 on a cairo pdfsurface for each pages. 150 on a cairo pdfsurface for each pages.
@@ -166,54 +160,26 @@ class PdfStripper(parser.GenericParser):
166 for pagenum in xrange(self.document.get_n_pages()): 160 for pagenum in xrange(self.document.get_n_pages()):
167 page = self.document.get_page(pagenum) 161 page = self.document.get_page(pagenum)
168 context.translate(0, 0) 162 context.translate(0, 0)
169 page.render(context) # render the page on context 163 page.render_for_printing(context) # render the page on context
170 context.show_page() # draw context on surface 164 context.show_page() # draw context on surface
171 surface.finish() 165 surface.finish()
172 return self._remove_meta()
173 166
174 def _remove_meta(self): 167 try:
175 '''
176 Remove superficial/external metadata
177 from a PDF file, using exiftool,
178 of pdfrw if exiftool is not installed
179 '''
180 processed = False
181 try:# try with pdfrw
182 import pdfrw 168 import pdfrw
183 #For now, poppler cannot write meta, so we must use pdfrw 169 #For now, poppler cannot write meta, so we must use pdfrw
184 logging.debug('Removing %s\'s superficial metadata' % self.filename) 170 logging.debug('Removing %s\'s superficial metadata' % self.filename)
185 trailer = pdfrw.PdfReader(self.output) 171 trailer = pdfrw.PdfReader(self.output)
186 trailer.Info.Producer = trailer.Author = trailer.Info.Creator = None 172 trailer.Info.Producer = None
173 trailer.Info.Creator = None
187 writer = pdfrw.PdfWriter() 174 writer = pdfrw.PdfWriter()
188 writer.trailer = trailer 175 writer.trailer = trailer
189 writer.write(self.output) 176 writer.write(self.output)
190 self.do_backup() 177 self.do_backup()
191 processed = True 178 return True
192 except:
193 pass
194
195 try: # try with exiftool
196 subprocess.Popen('exiftool', stdout=open('/dev/null'))
197 import exiftool
198 # Note: '-All=' must be followed by a known exiftool option.
199 if self.backup:
200 process = subprocess.Popen(['exiftool', '-m', '-All=',
201 '-out', self.output, self.filename], stdout=open('/dev/null'))
202 process.wait()
203 else:
204 # Note: '-All=' must be followed by a known exiftool option.
205 process = subprocess.Popen(
206 ['exiftool', '-All=', '-overwrite_original', self.filename],
207 stdout=open('/dev/null'))
208 process.wait()
209 processed = True
210 except: 179 except:
211 pass 180 print('Unable to remove all metadata from %s, please install\
212 181 pdfrw' % self.output)
213 if processed is False: 182 return False
214 logging.error('Please install either pdfrw, or exiftool to\
215 fully handle PDF files')
216 return processed
217 183
218 def get_meta(self): 184 def get_meta(self):
219 ''' 185 '''
@@ -221,8 +187,7 @@ class PdfStripper(parser.GenericParser):
221 ''' 187 '''
222 metadata = {} 188 metadata = {}
223 for key in self.meta_list: 189 for key in self.meta_list:
224 if self.document.get_property(key) is not None and \ 190 if self.document.get_property(key) is not None:
225 self.document.get_property(key) != '':
226 metadata[key] = self.document.get_property(key) 191 metadata[key] = self.document.get_property(key)
227 return metadata 192 return metadata
228 193
@@ -234,7 +199,7 @@ class OpenXmlStripper(archive.GenericArchiveStripper):
234 It contains mostly xml, but can have media blobs, crap, ... 199 It contains mostly xml, but can have media blobs, crap, ...
235 (I don't like this format.) 200 (I don't like this format.)
236 ''' 201 '''
237 def _remove_all(self, method): 202 def _remove_all(self):
238 ''' 203 '''
239 FIXME ? 204 FIXME ?
240 There is a patch implementing the Zipfile.remove() 205 There is a patch implementing the Zipfile.remove()
@@ -258,10 +223,7 @@ class OpenXmlStripper(archive.GenericArchiveStripper):
258 try: 223 try:
259 cfile = mat.create_class_file(name, False, 224 cfile = mat.create_class_file(name, False,
260 self.add2archive) 225 self.add2archive)
261 if method == 'normal': 226 cfile.remove_all()
262 cfile.remove_all()
263 else:
264 cfile.remove_all_strict()
265 logging.debug('Processing %s from %s' % (item, 227 logging.debug('Processing %s from %s' % (item,
266 self.filename)) 228 self.filename))
267 zipout.write(name, item) 229 zipout.write(name, item)