diff options
| author | jvoisin | 2011-10-26 00:14:00 +0200 |
|---|---|---|
| committer | jvoisin | 2011-10-26 00:14:00 +0200 |
| commit | a88071264391211017a470e0fd6f06dda67760b4 (patch) | |
| tree | 517b8a9997a66324b58ed8866cb3f518da92406a | |
| parent | 77c8885fb7b6499889a757ef1be6d881031cff0c (diff) | |
Revert a stupid commit, and improve pdf processing
| -rw-r--r-- | mat/exiftool.py | 4 | ||||
| -rw-r--r-- | mat/office.py | 39 |
2 files changed, 25 insertions, 18 deletions
diff --git a/mat/exiftool.py b/mat/exiftool.py index 18d603e..5a4ecc9 100644 --- a/mat/exiftool.py +++ b/mat/exiftool.py | |||
| @@ -31,13 +31,13 @@ class ExiftoolStripper(parser.GenericParser): | |||
| 31 | Remove all metadata with help of exiftool | 31 | Remove all metadata with help of exiftool |
| 32 | ''' | 32 | ''' |
| 33 | if self.backup: | 33 | if self.backup: |
| 34 | process = subprocess.Popen(['exiftool', '-All', | 34 | process = subprocess.Popen(['exiftool', '-All=', |
| 35 | '-out', self.output, self.filename], | 35 | '-out', self.output, self.filename], |
| 36 | stdout=open('/dev/null')) | 36 | stdout=open('/dev/null')) |
| 37 | process.wait() | 37 | process.wait() |
| 38 | else: | 38 | else: |
| 39 | process = subprocess.Popen(['exiftool', '-overwrite_original', | 39 | process = subprocess.Popen(['exiftool', '-overwrite_original', |
| 40 | '-All', self.filename], stdout=open('/dev/null')) | 40 | '-All=', self.filename], stdout=open('/dev/null')) |
| 41 | process.wait() | 41 | process.wait() |
| 42 | 42 | ||
| 43 | def is_clean(self): | 43 | def is_clean(self): |
diff --git a/mat/office.py b/mat/office.py index 30b1669..c4b6fa8 100644 --- a/mat/office.py +++ b/mat/office.py | |||
| @@ -167,31 +167,38 @@ class PdfStripper(parser.GenericParser): | |||
| 167 | from a pdf file, using exiftool, | 167 | from a pdf file, using exiftool, |
| 168 | of pdfrw if exiftool is not installed | 168 | of pdfrw if exiftool is not installed |
| 169 | ''' | 169 | ''' |
| 170 | try: | 170 | processed = False |
| 171 | try: # try with pdfrw | ||
| 172 | import pdfrw | ||
| 173 | #For now, poppler cannot write meta, so we must use pdfrw | ||
| 174 | logging.debug('Removing %s\'s superficial metadata' % self.filename) | ||
| 175 | trailer = pdfrw.PdfReader(self.output) | ||
| 176 | trailer.Info.Producer = trailer.Info.Creator = None | ||
| 177 | writer = pdfrw.PdfWriter() | ||
| 178 | writer.trailer = trailer | ||
| 179 | writer.write(self.output) | ||
| 180 | self.do_backup() | ||
| 181 | processed = True | ||
| 182 | except: | ||
| 183 | pass | ||
| 184 | |||
| 185 | try: # try with exiftool | ||
| 171 | import exiftool | 186 | import exiftool |
| 172 | if self.backup: | 187 | if self.backup: |
| 173 | process = subprocess.Popen(['exiftool', '-All', | 188 | process = subprocess.Popen(['exiftool', '-All=', |
| 174 | '-out', self.output, self.filename], | 189 | '-out', self.output, self.filename], |
| 175 | stdout=open('/dev/null')) | 190 | stdout=open('/dev/null')) |
| 176 | process.wait() | 191 | process.wait() |
| 177 | else: | 192 | else: |
| 178 | process = subprocess.Popen(['exiftool', '-overwrite_original', | 193 | process = subprocess.Popen(['exiftool', '-overwrite_original', |
| 179 | '-All', self.filename], stdout=open('/dev/null')) | 194 | '-All=', self.filename], stdout=open('/dev/null')) |
| 180 | process.wait() | 195 | process.wait() |
| 196 | processed = True | ||
| 181 | except: | 197 | except: |
| 182 | try: | 198 | pass |
| 183 | import pdfrw | 199 | |
| 184 | #For now, poppler cannot write meta, so we must use pdfrw | 200 | if processed == False: |
| 185 | logging.debug('Removing %s\'s superficial metadata' % self.filename) | 201 | logging.error('Please install either pdfrw, or exiftool') |
| 186 | trailer = pdfrw.PdfReader(self.output) | ||
| 187 | trailer.Info.Producer = trailer.Info.Creator = None | ||
| 188 | writer = pdfrw.PdfWriter() | ||
| 189 | writer.trailer = trailer | ||
| 190 | writer.write(self.output) | ||
| 191 | self.do_backup() | ||
| 192 | except: | ||
| 193 | logging.error('You don\'t have either python-pdfrw, or\ | ||
| 194 | exiftool: processed pdf are not totally clean !') | ||
| 195 | 202 | ||
| 196 | def get_meta(self): | 203 | def get_meta(self): |
| 197 | ''' | 204 | ''' |
