From b0b30b8ed51d6de3671c0b133edd6f7f3230282b Mon Sep 17 00:00:00 2001 From: jvoisin Date: Wed, 19 Dec 2012 21:17:02 +0100 Subject: Preliminary support of producted pdf quality choice --- MAT/archive.py | 25 +++++++++++-------------- MAT/exiftool.py | 5 ++--- MAT/mat.py | 7 +++++-- MAT/misc.py | 5 ++--- MAT/office.py | 18 +++++++++++------- MAT/parser.py | 2 +- mat | 4 +++- mat-gui | 19 ++++++++++++++----- 8 files changed, 49 insertions(+), 36 deletions(-) diff --git a/MAT/archive.py b/MAT/archive.py index f78a2a7..69c8f1b 100644 --- a/MAT/archive.py +++ b/MAT/archive.py @@ -17,11 +17,10 @@ class GenericArchiveStripper(parser.GenericParser): ''' Represent a generic archive ''' - def __init__(self, filename, parser, mime, backup, add2archive): - super(GenericArchiveStripper, self).__init__(filename, parser, mime, - backup, add2archive) + def __init__(self, filename, parser, mime, backup, **kwargs): + super(GenericArchiveStripper, self).__init__(filename, parser, mime, backup, **kwargs) self.compression = '' - self.add2archive = add2archive + self.add2archive = kwargs['add2archive'] self.tempdir = tempfile.mkdtemp() def __del__(self): @@ -81,7 +80,7 @@ class ZipStripper(GenericArchiveStripper): if os.path.isfile(name): try: cfile = mat.create_class_file(name, False, - self.add2archive) + add2archive=self.add2archive) if not cfile.is_clean(): return False except: @@ -129,7 +128,7 @@ harmless format' % item.filename) if os.path.isfile(name): try: cfile = mat.create_class_file(name, False, - self.add2archive) + add2archive=self.add2archive) cfile.remove_all() logging.debug('Processing %s from %s' % (item.filename, self.filename)) @@ -173,7 +172,7 @@ class TarStripper(GenericArchiveStripper): #no backup file try: cfile = mat.create_class_file(name, False, - self.add2archive) + add2archive=self.add2archive) cfile.remove_all() tarout.add(name, item.name, filter=self._remove) except: @@ -218,7 +217,7 @@ class TarStripper(GenericArchiveStripper): if item.type == '0': # is item a regular file ? try: class_file = mat.create_class_file(name, - False, self.add2archive) # no backup file + False, add2archive=self.add2archive) # no backup file if not class_file.is_clean(): tarin.close() return False @@ -256,9 +255,8 @@ class GzipStripper(TarStripper): ''' Represent a tar.gz archive ''' - def __init__(self, filename, parser, mime, backup, add2archive): - super(GzipStripper, self).__init__(filename, parser, mime, backup, - add2archive) + def __init__(self, filename, parser, mime, backup, **kwargs): + super(GzipStripper, self).__init__(filename, parser, mime, backup, **kwargs) self.compression = ':gz' @@ -266,7 +264,6 @@ class Bzip2Stripper(TarStripper): ''' Represents a tar.bz2 archive ''' - def __init__(self, filename, parser, mime, backup, add2archive): - super(Bzip2Stripper, self).__init__(filename, parser, mime, backup, - add2archive) + def __init__(self, filename, parser, mime, backup, **kwargs): + super(Bzip2Stripper, self).__init__(filename, parser, mime, backup, **kwargs) self.compression = ':bz2' diff --git a/MAT/exiftool.py b/MAT/exiftool.py index 758a094..eeefcb8 100644 --- a/MAT/exiftool.py +++ b/MAT/exiftool.py @@ -11,9 +11,8 @@ class ExiftoolStripper(parser.GenericParser): A generic stripper class using exiftool as backend ''' - def __init__(self, filename, parser, mime, backup, add2archive): - super(ExiftoolStripper, self).__init__(filename, parser, mime, - backup, add2archive) + def __init__(self, filename, parser, mime, backup, **kwargs): + super(ExiftoolStripper, self).__init__(filename, parser, mime, backup, **kwargs) self.allowed = ['ExifTool Version Number', 'File Name', 'Directory', 'File Size', 'File Modification Date/Time', 'File Permissions', 'File Type', 'MIME Type', 'Image Width', 'Image Height', diff --git a/MAT/mat.py b/MAT/mat.py index 4c12333..84f26f2 100644 --- a/MAT/mat.py +++ b/MAT/mat.py @@ -102,7 +102,7 @@ def secure_remove(filename): logging.error('Unable to remove %s' % filename) -def create_class_file(name, backup, add2archive): +def create_class_file(name, backup, **kwargs): ''' return a $FILETYPEStripper() class, corresponding to the filetype of the given file @@ -149,4 +149,7 @@ def create_class_file(name, backup, add2archive): logging.info('Don\'t have stripper for %s format' % mime) return None - return stripper_class(filename, parser, mime, backup, add2archive) + if mime.endswith('pdf') and mime.startswith('application/'): + return stripper_class(filename, parser, mime, backup, **kwargs) + + return stripper_class(filename, parser, mime, backup, **kwargs) diff --git a/MAT/misc.py b/MAT/misc.py index d084861..1ffc327 100644 --- a/MAT/misc.py +++ b/MAT/misc.py @@ -12,9 +12,8 @@ class TorrentStripper(parser.GenericParser): Represent a torrent file with the help of the bencode lib from Petru Paler ''' - def __init__(self, filename, parser, mime, backup, add2archive): - super(TorrentStripper, self).__init__(filename, parser, mime, - backup, add2archive) + def __init__(self, filename, parser, mime, backup, **kwargs): + super(TorrentStripper, self).__init__(filename, parser, mime, backup) self.fields = ['comment', 'creation date', 'created by'] def is_clean(self): diff --git a/MAT/office.py b/MAT/office.py index d14125b..190a6d4 100644 --- a/MAT/office.py +++ b/MAT/office.py @@ -107,8 +107,9 @@ class OpenDocumentStripper(archive.GenericArchiveStripper): try: zipin.getinfo('meta.xml') except KeyError: # no meta.xml in the file + kwargs = {'backup':self.backup, 'add2archive':self.add2archive} czf = archive.ZipStripper(self.filename, self.parser, - 'application/zip', self.backup, self.add2archive) + 'application/zip', **kwargs) if czf.is_clean(): zipin.close() return True @@ -120,11 +121,11 @@ class PdfStripper(parser.GenericParser): ''' Represent a PDF file ''' - def __init__(self, filename, parser, mime, backup, add2archive): - super(PdfStripper, self).__init__(filename, parser, mime, backup, - add2archive) + def __init__(self, filename, parser, mime, backup, **kwargs): + super(PdfStripper, self).__init__(filename, parser, mime, backup, **kwargs) uri = 'file://' + os.path.abspath(self.filename) self.password = None + self.quality = kwargs['low_pdf_quality'] self.document = poppler.document_new_from_file(uri, self.password) self.meta_list = frozenset(['title', 'author', 'subject', 'keywords', 'creator', 'producer', 'metadata']) @@ -161,7 +162,10 @@ class PdfStripper(parser.GenericParser): for pagenum in xrange(self.document.get_n_pages()): page = self.document.get_page(pagenum) context.translate(0, 0) - page.render_for_printing(context) # render the page on context + if self.quality: + page.render(context) # render the page on context + else: + page.render_for_printing(context) # render the page on context context.show_page() # draw context on surface surface.finish() @@ -175,7 +179,6 @@ class PdfStripper(parser.GenericParser): writer.trailer = trailer writer.write(self.output) self.do_backup() - return True except: print('Unable to remove all metadata from %s, please install\ pdfrw' % self.output) @@ -248,8 +251,9 @@ class OpenXmlStripper(archive.GenericArchiveStripper): if item.startswith('docProps/'): return False zipin.close() + kwargs = {'backup':self.backup, 'add2archive':self.add2archive} czf = archive.ZipStripper(self.filename, self.parser, - 'application/zip', self.backup, self.add2archive) + 'application/zip', **kwargs) return czf.is_clean() def get_meta(self): diff --git a/MAT/parser.py b/MAT/parser.py index 4427b01..6be2b03 100644 --- a/MAT/parser.py +++ b/MAT/parser.py @@ -23,7 +23,7 @@ class GenericParser(object): ''' Parent class of all parsers ''' - def __init__(self, filename, parser, mime, backup, add2archive): + def __init__(self, filename, parser, mime, backup, **kwargs): self.filename = '' self.parser = parser self.mime = mime diff --git a/mat b/mat index af7421d..5cd61ee 100755 --- a/mat +++ b/mat @@ -26,6 +26,8 @@ The default behaviour is to clean files given in argument') help='Keep a backup copy') options.add_option('--force', '-f', action='store_true', default=False, help='Don\'t check if files are clean before cleaning') + options.add_option('--low-pdf-quality', '-L', action='store_true', default=False, + help='Produces a lighter, but lower quality PDF') info = optparse.OptionGroup(parser, 'Informations') info.add_option('--check', '-c', action='store_true', default=False, @@ -135,7 +137,7 @@ def main(): for filename in filenames: class_file = mat.create_class_file(filename, args.backup, - args.add2archive) + add2archive=args.add2archive, low_pdf_quality=args.low_pdf_quality) if class_file: func(class_file, filename, args.force) else: diff --git a/mat-gui b/mat-gui index 05bde66..9b1a84f 100755 --- a/mat-gui +++ b/mat-gui @@ -30,9 +30,9 @@ class CFile(object): This class exist just to be "around" my parser.Generic_parser class, since the gtk.ListStore does not accept it. ''' - def __init__(self, filename, backup, add2archive): + def __init__(self, filename, backup, **kwargs): try: - self.file = mat.create_class_file(filename, backup, add2archive) + self.file = mat.create_class_file(filename, backup, **kwargs) except: self.file = None @@ -46,6 +46,7 @@ class GUI: self.force = False self.backup = True self.add2archive = True + self.pdf_quality = False # Main window self.window = gtk.Window() @@ -290,7 +291,7 @@ class GUI: # if filename does not exist return False - cf = CFile(filename, self.backup, self.add2archive) + cf = CFile(filename, self.backup, add2archive=self.add2archive, low_pdf_quality=self.pdf_quality) if cf.file: # if the file is supported by the mat self.liststore.append([cf, os.path.dirname(cf.file.filename) + os.path.sep, cf.file.basename, cf.file.mime, _('unknow'), 'None']) @@ -469,17 +470,23 @@ cleaning')) backup.set_tooltip_text(_('Keep a backup copy')) table.attach(backup, 0, 1, 1, 2) + pdf_quality = gtk.CheckButton(_('Reduce PDF quality'), False) + pdf_quality.set_active(self.pdf_quality) + pdf_quality.connect('toggled', self.__invert, 'pdf_quality') + pdf_quality.set_tooltip_text(_('Reduce the produced PDF size and quality')) + table.attach(pdf_quality, 0, 1, 2, 3) + add2archive = gtk.CheckButton(_('Add unsupported file to archives'), False) add2archive.set_active(self.add2archive) add2archive.connect('toggled', self.__invert, 'add2archive') add2archive.set_tooltip_text(_('Add non-supported (and so \ non-anonymised) file to output archive')) - table.attach(add2archive, 0, 1, 2, 3) + table.attach(add2archive, 0, 1, 3, 4) hbox.show_all() response = dialog.run() - if response is 0: # gtk.STOCK_OK + if response == 0: # gtk.STOCK_OK dialog.destroy() def __invert(self, button, name): @@ -494,6 +501,8 @@ non-anonymised) file to output archive')) # change the "backup" property of all files self.liststore[line][0].file.backup = self.backup self.treeview.get_column(4).set_visible(self.backup) + elif name == 'pdf_quality': + self.pdf_quality = not self.pdf_quality elif name == 'add2archive': self.add2archive = not self.add2archive -- cgit v1.3