diff options
| author | jvoisin | 2012-02-06 02:05:05 +0100 |
|---|---|---|
| committer | jvoisin | 2012-02-06 02:05:05 +0100 |
| commit | 2cba152e7c00ff2c422d5e1c911f17ea07f346ed (patch) | |
| tree | e83a362b8f49f72b0457af7fd566ea37f9815b14 /lib/office.py | |
| parent | c71999c4f789beb8812f9570926f894ac9f1938e (diff) | |
Merge the two processing mode into a unique one
Diffstat (limited to 'lib/office.py')
| -rw-r--r-- | lib/office.py | 68 |
1 files changed, 15 insertions, 53 deletions
diff --git a/lib/office.py b/lib/office.py index e1d738e..82b817e 100644 --- a/lib/office.py +++ b/lib/office.py | |||
| @@ -49,7 +49,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper): | |||
| 49 | logging.debug('%s has no opendocument metadata' % self.filename) | 49 | logging.debug('%s has no opendocument metadata' % self.filename) |
| 50 | return metadata | 50 | return metadata |
| 51 | 51 | ||
| 52 | def _remove_all(self, method): | 52 | def _remove_all(self): |
| 53 | ''' | 53 | ''' |
| 54 | FIXME ? | 54 | FIXME ? |
| 55 | There is a patch implementing the Zipfile.remove() | 55 | There is a patch implementing the Zipfile.remove() |
| @@ -84,10 +84,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper): | |||
| 84 | try: | 84 | try: |
| 85 | cfile = mat.create_class_file(name, False, | 85 | cfile = mat.create_class_file(name, False, |
| 86 | self.add2archive) | 86 | self.add2archive) |
| 87 | if method == 'normal': | 87 | cfile.remove_all() |
| 88 | cfile.remove_all() | ||
| 89 | else: | ||
| 90 | cfile.remove_all_strict() | ||
| 91 | logging.debug('Processing %s from %s' % (item, | 88 | logging.debug('Processing %s from %s' % (item, |
| 92 | self.filename)) | 89 | self.filename)) |
| 93 | zipout.write(name, item) | 90 | zipout.write(name, item) |
| @@ -137,20 +134,17 @@ class PdfStripper(parser.GenericParser): | |||
| 137 | Check if the file is clean from harmful metadatas | 134 | Check if the file is clean from harmful metadatas |
| 138 | ''' | 135 | ''' |
| 139 | for key in self.meta_list: | 136 | for key in self.meta_list: |
| 140 | if self.document.get_property(key) is not None and \ | 137 | if self.document.get_property(key) != None: |
| 141 | self.document.get_property(key) != '': | ||
| 142 | return False | 138 | return False |
| 143 | return True | 139 | return True |
| 144 | 140 | ||
| 145 | |||
| 146 | def remove_all(self): | 141 | def remove_all(self): |
| 147 | ''' | 142 | ''' |
| 148 | Remove supperficial | 143 | Remove supperficial |
| 149 | ''' | 144 | ''' |
| 150 | return self._remove_meta() | 145 | return self._remove_meta() |
| 151 | 146 | ||
| 152 | 147 | def _remove_meta(self): | |
| 153 | def remove_all_strict(self): | ||
| 154 | ''' | 148 | ''' |
| 155 | Opening the PDF with poppler, then doing a render | 149 | Opening the PDF with poppler, then doing a render |
| 156 | on a cairo pdfsurface for each pages. | 150 | on a cairo pdfsurface for each pages. |
| @@ -166,54 +160,26 @@ class PdfStripper(parser.GenericParser): | |||
| 166 | for pagenum in xrange(self.document.get_n_pages()): | 160 | for pagenum in xrange(self.document.get_n_pages()): |
| 167 | page = self.document.get_page(pagenum) | 161 | page = self.document.get_page(pagenum) |
| 168 | context.translate(0, 0) | 162 | context.translate(0, 0) |
| 169 | page.render(context) # render the page on context | 163 | page.render_for_printing(context) # render the page on context |
| 170 | context.show_page() # draw context on surface | 164 | context.show_page() # draw context on surface |
| 171 | surface.finish() | 165 | surface.finish() |
| 172 | return self._remove_meta() | ||
| 173 | 166 | ||
| 174 | def _remove_meta(self): | 167 | try: |
| 175 | ''' | ||
| 176 | Remove superficial/external metadata | ||
| 177 | from a PDF file, using exiftool, | ||
| 178 | of pdfrw if exiftool is not installed | ||
| 179 | ''' | ||
| 180 | processed = False | ||
| 181 | try:# try with pdfrw | ||
| 182 | import pdfrw | 168 | import pdfrw |
| 183 | #For now, poppler cannot write meta, so we must use pdfrw | 169 | #For now, poppler cannot write meta, so we must use pdfrw |
| 184 | logging.debug('Removing %s\'s superficial metadata' % self.filename) | 170 | logging.debug('Removing %s\'s superficial metadata' % self.filename) |
| 185 | trailer = pdfrw.PdfReader(self.output) | 171 | trailer = pdfrw.PdfReader(self.output) |
| 186 | trailer.Info.Producer = trailer.Author = trailer.Info.Creator = None | 172 | trailer.Info.Producer = None |
| 173 | trailer.Info.Creator = None | ||
| 187 | writer = pdfrw.PdfWriter() | 174 | writer = pdfrw.PdfWriter() |
| 188 | writer.trailer = trailer | 175 | writer.trailer = trailer |
| 189 | writer.write(self.output) | 176 | writer.write(self.output) |
| 190 | self.do_backup() | 177 | self.do_backup() |
| 191 | processed = True | 178 | return True |
| 192 | except: | ||
| 193 | pass | ||
| 194 | |||
| 195 | try: # try with exiftool | ||
| 196 | subprocess.Popen('exiftool', stdout=open('/dev/null')) | ||
| 197 | import exiftool | ||
| 198 | # Note: '-All=' must be followed by a known exiftool option. | ||
| 199 | if self.backup: | ||
| 200 | process = subprocess.Popen(['exiftool', '-m', '-All=', | ||
| 201 | '-out', self.output, self.filename], stdout=open('/dev/null')) | ||
| 202 | process.wait() | ||
| 203 | else: | ||
| 204 | # Note: '-All=' must be followed by a known exiftool option. | ||
| 205 | process = subprocess.Popen( | ||
| 206 | ['exiftool', '-All=', '-overwrite_original', self.filename], | ||
| 207 | stdout=open('/dev/null')) | ||
| 208 | process.wait() | ||
| 209 | processed = True | ||
| 210 | except: | 179 | except: |
| 211 | pass | 180 | print('Unable to remove all metadata from %s, please install\ |
| 212 | 181 | pdfrw' % self.output) | |
| 213 | if processed is False: | 182 | return False |
| 214 | logging.error('Please install either pdfrw, or exiftool to\ | ||
| 215 | fully handle PDF files') | ||
| 216 | return processed | ||
| 217 | 183 | ||
| 218 | def get_meta(self): | 184 | def get_meta(self): |
| 219 | ''' | 185 | ''' |
| @@ -221,8 +187,7 @@ class PdfStripper(parser.GenericParser): | |||
| 221 | ''' | 187 | ''' |
| 222 | metadata = {} | 188 | metadata = {} |
| 223 | for key in self.meta_list: | 189 | for key in self.meta_list: |
| 224 | if self.document.get_property(key) is not None and \ | 190 | if self.document.get_property(key) is not None: |
| 225 | self.document.get_property(key) != '': | ||
| 226 | metadata[key] = self.document.get_property(key) | 191 | metadata[key] = self.document.get_property(key) |
| 227 | return metadata | 192 | return metadata |
| 228 | 193 | ||
| @@ -234,7 +199,7 @@ class OpenXmlStripper(archive.GenericArchiveStripper): | |||
| 234 | It contains mostly xml, but can have media blobs, crap, ... | 199 | It contains mostly xml, but can have media blobs, crap, ... |
| 235 | (I don't like this format.) | 200 | (I don't like this format.) |
| 236 | ''' | 201 | ''' |
| 237 | def _remove_all(self, method): | 202 | def _remove_all(self): |
| 238 | ''' | 203 | ''' |
| 239 | FIXME ? | 204 | FIXME ? |
| 240 | There is a patch implementing the Zipfile.remove() | 205 | There is a patch implementing the Zipfile.remove() |
| @@ -258,10 +223,7 @@ class OpenXmlStripper(archive.GenericArchiveStripper): | |||
| 258 | try: | 223 | try: |
| 259 | cfile = mat.create_class_file(name, False, | 224 | cfile = mat.create_class_file(name, False, |
| 260 | self.add2archive) | 225 | self.add2archive) |
| 261 | if method == 'normal': | 226 | cfile.remove_all() |
| 262 | cfile.remove_all() | ||
| 263 | else: | ||
| 264 | cfile.remove_all_strict() | ||
| 265 | logging.debug('Processing %s from %s' % (item, | 227 | logging.debug('Processing %s from %s' % (item, |
| 266 | self.filename)) | 228 | self.filename)) |
| 267 | zipout.write(name, item) | 229 | zipout.write(name, item) |
