From b0b30b8ed51d6de3671c0b133edd6f7f3230282b Mon Sep 17 00:00:00 2001 From: jvoisin Date: Wed, 19 Dec 2012 21:17:02 +0100 Subject: Preliminary support of producted pdf quality choice --- MAT/archive.py | 25 +++++++++++-------------- MAT/exiftool.py | 5 ++--- MAT/mat.py | 7 +++++-- MAT/misc.py | 5 ++--- MAT/office.py | 18 +++++++++++------- MAT/parser.py | 2 +- 6 files changed, 32 insertions(+), 30 deletions(-) (limited to 'MAT') diff --git a/MAT/archive.py b/MAT/archive.py index f78a2a7..69c8f1b 100644 --- a/MAT/archive.py +++ b/MAT/archive.py @@ -17,11 +17,10 @@ class GenericArchiveStripper(parser.GenericParser): ''' Represent a generic archive ''' - def __init__(self, filename, parser, mime, backup, add2archive): - super(GenericArchiveStripper, self).__init__(filename, parser, mime, - backup, add2archive) + def __init__(self, filename, parser, mime, backup, **kwargs): + super(GenericArchiveStripper, self).__init__(filename, parser, mime, backup, **kwargs) self.compression = '' - self.add2archive = add2archive + self.add2archive = kwargs['add2archive'] self.tempdir = tempfile.mkdtemp() def __del__(self): @@ -81,7 +80,7 @@ class ZipStripper(GenericArchiveStripper): if os.path.isfile(name): try: cfile = mat.create_class_file(name, False, - self.add2archive) + add2archive=self.add2archive) if not cfile.is_clean(): return False except: @@ -129,7 +128,7 @@ harmless format' % item.filename) if os.path.isfile(name): try: cfile = mat.create_class_file(name, False, - self.add2archive) + add2archive=self.add2archive) cfile.remove_all() logging.debug('Processing %s from %s' % (item.filename, self.filename)) @@ -173,7 +172,7 @@ class TarStripper(GenericArchiveStripper): #no backup file try: cfile = mat.create_class_file(name, False, - self.add2archive) + add2archive=self.add2archive) cfile.remove_all() tarout.add(name, item.name, filter=self._remove) except: @@ -218,7 +217,7 @@ class TarStripper(GenericArchiveStripper): if item.type == '0': # is item a regular file ? try: class_file = mat.create_class_file(name, - False, self.add2archive) # no backup file + False, add2archive=self.add2archive) # no backup file if not class_file.is_clean(): tarin.close() return False @@ -256,9 +255,8 @@ class GzipStripper(TarStripper): ''' Represent a tar.gz archive ''' - def __init__(self, filename, parser, mime, backup, add2archive): - super(GzipStripper, self).__init__(filename, parser, mime, backup, - add2archive) + def __init__(self, filename, parser, mime, backup, **kwargs): + super(GzipStripper, self).__init__(filename, parser, mime, backup, **kwargs) self.compression = ':gz' @@ -266,7 +264,6 @@ class Bzip2Stripper(TarStripper): ''' Represents a tar.bz2 archive ''' - def __init__(self, filename, parser, mime, backup, add2archive): - super(Bzip2Stripper, self).__init__(filename, parser, mime, backup, - add2archive) + def __init__(self, filename, parser, mime, backup, **kwargs): + super(Bzip2Stripper, self).__init__(filename, parser, mime, backup, **kwargs) self.compression = ':bz2' diff --git a/MAT/exiftool.py b/MAT/exiftool.py index 758a094..eeefcb8 100644 --- a/MAT/exiftool.py +++ b/MAT/exiftool.py @@ -11,9 +11,8 @@ class ExiftoolStripper(parser.GenericParser): A generic stripper class using exiftool as backend ''' - def __init__(self, filename, parser, mime, backup, add2archive): - super(ExiftoolStripper, self).__init__(filename, parser, mime, - backup, add2archive) + def __init__(self, filename, parser, mime, backup, **kwargs): + super(ExiftoolStripper, self).__init__(filename, parser, mime, backup, **kwargs) self.allowed = ['ExifTool Version Number', 'File Name', 'Directory', 'File Size', 'File Modification Date/Time', 'File Permissions', 'File Type', 'MIME Type', 'Image Width', 'Image Height', diff --git a/MAT/mat.py b/MAT/mat.py index 4c12333..84f26f2 100644 --- a/MAT/mat.py +++ b/MAT/mat.py @@ -102,7 +102,7 @@ def secure_remove(filename): logging.error('Unable to remove %s' % filename) -def create_class_file(name, backup, add2archive): +def create_class_file(name, backup, **kwargs): ''' return a $FILETYPEStripper() class, corresponding to the filetype of the given file @@ -149,4 +149,7 @@ def create_class_file(name, backup, add2archive): logging.info('Don\'t have stripper for %s format' % mime) return None - return stripper_class(filename, parser, mime, backup, add2archive) + if mime.endswith('pdf') and mime.startswith('application/'): + return stripper_class(filename, parser, mime, backup, **kwargs) + + return stripper_class(filename, parser, mime, backup, **kwargs) diff --git a/MAT/misc.py b/MAT/misc.py index d084861..1ffc327 100644 --- a/MAT/misc.py +++ b/MAT/misc.py @@ -12,9 +12,8 @@ class TorrentStripper(parser.GenericParser): Represent a torrent file with the help of the bencode lib from Petru Paler ''' - def __init__(self, filename, parser, mime, backup, add2archive): - super(TorrentStripper, self).__init__(filename, parser, mime, - backup, add2archive) + def __init__(self, filename, parser, mime, backup, **kwargs): + super(TorrentStripper, self).__init__(filename, parser, mime, backup) self.fields = ['comment', 'creation date', 'created by'] def is_clean(self): diff --git a/MAT/office.py b/MAT/office.py index d14125b..190a6d4 100644 --- a/MAT/office.py +++ b/MAT/office.py @@ -107,8 +107,9 @@ class OpenDocumentStripper(archive.GenericArchiveStripper): try: zipin.getinfo('meta.xml') except KeyError: # no meta.xml in the file + kwargs = {'backup':self.backup, 'add2archive':self.add2archive} czf = archive.ZipStripper(self.filename, self.parser, - 'application/zip', self.backup, self.add2archive) + 'application/zip', **kwargs) if czf.is_clean(): zipin.close() return True @@ -120,11 +121,11 @@ class PdfStripper(parser.GenericParser): ''' Represent a PDF file ''' - def __init__(self, filename, parser, mime, backup, add2archive): - super(PdfStripper, self).__init__(filename, parser, mime, backup, - add2archive) + def __init__(self, filename, parser, mime, backup, **kwargs): + super(PdfStripper, self).__init__(filename, parser, mime, backup, **kwargs) uri = 'file://' + os.path.abspath(self.filename) self.password = None + self.quality = kwargs['low_pdf_quality'] self.document = poppler.document_new_from_file(uri, self.password) self.meta_list = frozenset(['title', 'author', 'subject', 'keywords', 'creator', 'producer', 'metadata']) @@ -161,7 +162,10 @@ class PdfStripper(parser.GenericParser): for pagenum in xrange(self.document.get_n_pages()): page = self.document.get_page(pagenum) context.translate(0, 0) - page.render_for_printing(context) # render the page on context + if self.quality: + page.render(context) # render the page on context + else: + page.render_for_printing(context) # render the page on context context.show_page() # draw context on surface surface.finish() @@ -175,7 +179,6 @@ class PdfStripper(parser.GenericParser): writer.trailer = trailer writer.write(self.output) self.do_backup() - return True except: print('Unable to remove all metadata from %s, please install\ pdfrw' % self.output) @@ -248,8 +251,9 @@ class OpenXmlStripper(archive.GenericArchiveStripper): if item.startswith('docProps/'): return False zipin.close() + kwargs = {'backup':self.backup, 'add2archive':self.add2archive} czf = archive.ZipStripper(self.filename, self.parser, - 'application/zip', self.backup, self.add2archive) + 'application/zip', **kwargs) return czf.is_clean() def get_meta(self): diff --git a/MAT/parser.py b/MAT/parser.py index 4427b01..6be2b03 100644 --- a/MAT/parser.py +++ b/MAT/parser.py @@ -23,7 +23,7 @@ class GenericParser(object): ''' Parent class of all parsers ''' - def __init__(self, filename, parser, mime, backup, add2archive): + def __init__(self, filename, parser, mime, backup, **kwargs): self.filename = '' self.parser = parser self.mime = mime -- cgit v1.3