From a88071264391211017a470e0fd6f06dda67760b4 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Wed, 26 Oct 2011 00:14:00 +0200 Subject: Revert a stupid commit, and improve pdf processing --- mat/exiftool.py | 4 ++-- mat/office.py | 39 +++++++++++++++++++++++---------------- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/mat/exiftool.py b/mat/exiftool.py index 18d603e..5a4ecc9 100644 --- a/mat/exiftool.py +++ b/mat/exiftool.py @@ -31,13 +31,13 @@ class ExiftoolStripper(parser.GenericParser): Remove all metadata with help of exiftool ''' if self.backup: - process = subprocess.Popen(['exiftool', '-All', + process = subprocess.Popen(['exiftool', '-All=', '-out', self.output, self.filename], stdout=open('/dev/null')) process.wait() else: process = subprocess.Popen(['exiftool', '-overwrite_original', - '-All', self.filename], stdout=open('/dev/null')) + '-All=', self.filename], stdout=open('/dev/null')) process.wait() def is_clean(self): diff --git a/mat/office.py b/mat/office.py index 30b1669..c4b6fa8 100644 --- a/mat/office.py +++ b/mat/office.py @@ -167,31 +167,38 @@ class PdfStripper(parser.GenericParser): from a pdf file, using exiftool, of pdfrw if exiftool is not installed ''' - try: + processed = False + try: # try with pdfrw + import pdfrw + #For now, poppler cannot write meta, so we must use pdfrw + logging.debug('Removing %s\'s superficial metadata' % self.filename) + trailer = pdfrw.PdfReader(self.output) + trailer.Info.Producer = trailer.Info.Creator = None + writer = pdfrw.PdfWriter() + writer.trailer = trailer + writer.write(self.output) + self.do_backup() + processed = True + except: + pass + + try: # try with exiftool import exiftool if self.backup: - process = subprocess.Popen(['exiftool', '-All', + process = subprocess.Popen(['exiftool', '-All=', '-out', self.output, self.filename], stdout=open('/dev/null')) process.wait() else: process = subprocess.Popen(['exiftool', '-overwrite_original', - '-All', self.filename], stdout=open('/dev/null')) + '-All=', self.filename], stdout=open('/dev/null')) process.wait() + processed = True except: - try: - import pdfrw - #For now, poppler cannot write meta, so we must use pdfrw - logging.debug('Removing %s\'s superficial metadata' % self.filename) - trailer = pdfrw.PdfReader(self.output) - trailer.Info.Producer = trailer.Info.Creator = None - writer = pdfrw.PdfWriter() - writer.trailer = trailer - writer.write(self.output) - self.do_backup() - except: - logging.error('You don\'t have either python-pdfrw, or\ - exiftool: processed pdf are not totally clean !') + pass + + if processed == False: + logging.error('Please install either pdfrw, or exiftool') def get_meta(self): ''' -- cgit v1.3