diff options
| author | jvoisin | 2012-12-19 21:17:02 +0100 |
|---|---|---|
| committer | jvoisin | 2012-12-19 21:17:02 +0100 |
| commit | b0b30b8ed51d6de3671c0b133edd6f7f3230282b (patch) | |
| tree | e28a33c1c1eb0e3dfa81d0a21c586a003db54b2a | |
| parent | 99f982f844aa75cfd895e16bc6f3dcac778e31ac (diff) | |
Preliminary support of producted pdf quality choice
| -rw-r--r-- | MAT/archive.py | 25 | ||||
| -rw-r--r-- | MAT/exiftool.py | 5 | ||||
| -rw-r--r-- | MAT/mat.py | 7 | ||||
| -rw-r--r-- | MAT/misc.py | 5 | ||||
| -rw-r--r-- | MAT/office.py | 18 | ||||
| -rw-r--r-- | MAT/parser.py | 2 | ||||
| -rwxr-xr-x | mat | 4 | ||||
| -rwxr-xr-x | mat-gui | 19 |
8 files changed, 49 insertions, 36 deletions
diff --git a/MAT/archive.py b/MAT/archive.py index f78a2a7..69c8f1b 100644 --- a/MAT/archive.py +++ b/MAT/archive.py | |||
| @@ -17,11 +17,10 @@ class GenericArchiveStripper(parser.GenericParser): | |||
| 17 | ''' | 17 | ''' |
| 18 | Represent a generic archive | 18 | Represent a generic archive |
| 19 | ''' | 19 | ''' |
| 20 | def __init__(self, filename, parser, mime, backup, add2archive): | 20 | def __init__(self, filename, parser, mime, backup, **kwargs): |
| 21 | super(GenericArchiveStripper, self).__init__(filename, parser, mime, | 21 | super(GenericArchiveStripper, self).__init__(filename, parser, mime, backup, **kwargs) |
| 22 | backup, add2archive) | ||
| 23 | self.compression = '' | 22 | self.compression = '' |
| 24 | self.add2archive = add2archive | 23 | self.add2archive = kwargs['add2archive'] |
| 25 | self.tempdir = tempfile.mkdtemp() | 24 | self.tempdir = tempfile.mkdtemp() |
| 26 | 25 | ||
| 27 | def __del__(self): | 26 | def __del__(self): |
| @@ -81,7 +80,7 @@ class ZipStripper(GenericArchiveStripper): | |||
| 81 | if os.path.isfile(name): | 80 | if os.path.isfile(name): |
| 82 | try: | 81 | try: |
| 83 | cfile = mat.create_class_file(name, False, | 82 | cfile = mat.create_class_file(name, False, |
| 84 | self.add2archive) | 83 | add2archive=self.add2archive) |
| 85 | if not cfile.is_clean(): | 84 | if not cfile.is_clean(): |
| 86 | return False | 85 | return False |
| 87 | except: | 86 | except: |
| @@ -129,7 +128,7 @@ harmless format' % item.filename) | |||
| 129 | if os.path.isfile(name): | 128 | if os.path.isfile(name): |
| 130 | try: | 129 | try: |
| 131 | cfile = mat.create_class_file(name, False, | 130 | cfile = mat.create_class_file(name, False, |
| 132 | self.add2archive) | 131 | add2archive=self.add2archive) |
| 133 | cfile.remove_all() | 132 | cfile.remove_all() |
| 134 | logging.debug('Processing %s from %s' % (item.filename, | 133 | logging.debug('Processing %s from %s' % (item.filename, |
| 135 | self.filename)) | 134 | self.filename)) |
| @@ -173,7 +172,7 @@ class TarStripper(GenericArchiveStripper): | |||
| 173 | #no backup file | 172 | #no backup file |
| 174 | try: | 173 | try: |
| 175 | cfile = mat.create_class_file(name, False, | 174 | cfile = mat.create_class_file(name, False, |
| 176 | self.add2archive) | 175 | add2archive=self.add2archive) |
| 177 | cfile.remove_all() | 176 | cfile.remove_all() |
| 178 | tarout.add(name, item.name, filter=self._remove) | 177 | tarout.add(name, item.name, filter=self._remove) |
| 179 | except: | 178 | except: |
| @@ -218,7 +217,7 @@ class TarStripper(GenericArchiveStripper): | |||
| 218 | if item.type == '0': # is item a regular file ? | 217 | if item.type == '0': # is item a regular file ? |
| 219 | try: | 218 | try: |
| 220 | class_file = mat.create_class_file(name, | 219 | class_file = mat.create_class_file(name, |
| 221 | False, self.add2archive) # no backup file | 220 | False, add2archive=self.add2archive) # no backup file |
| 222 | if not class_file.is_clean(): | 221 | if not class_file.is_clean(): |
| 223 | tarin.close() | 222 | tarin.close() |
| 224 | return False | 223 | return False |
| @@ -256,9 +255,8 @@ class GzipStripper(TarStripper): | |||
| 256 | ''' | 255 | ''' |
| 257 | Represent a tar.gz archive | 256 | Represent a tar.gz archive |
| 258 | ''' | 257 | ''' |
| 259 | def __init__(self, filename, parser, mime, backup, add2archive): | 258 | def __init__(self, filename, parser, mime, backup, **kwargs): |
| 260 | super(GzipStripper, self).__init__(filename, parser, mime, backup, | 259 | super(GzipStripper, self).__init__(filename, parser, mime, backup, **kwargs) |
| 261 | add2archive) | ||
| 262 | self.compression = ':gz' | 260 | self.compression = ':gz' |
| 263 | 261 | ||
| 264 | 262 | ||
| @@ -266,7 +264,6 @@ class Bzip2Stripper(TarStripper): | |||
| 266 | ''' | 264 | ''' |
| 267 | Represents a tar.bz2 archive | 265 | Represents a tar.bz2 archive |
| 268 | ''' | 266 | ''' |
| 269 | def __init__(self, filename, parser, mime, backup, add2archive): | 267 | def __init__(self, filename, parser, mime, backup, **kwargs): |
| 270 | super(Bzip2Stripper, self).__init__(filename, parser, mime, backup, | 268 | super(Bzip2Stripper, self).__init__(filename, parser, mime, backup, **kwargs) |
| 271 | add2archive) | ||
| 272 | self.compression = ':bz2' | 269 | self.compression = ':bz2' |
diff --git a/MAT/exiftool.py b/MAT/exiftool.py index 758a094..eeefcb8 100644 --- a/MAT/exiftool.py +++ b/MAT/exiftool.py | |||
| @@ -11,9 +11,8 @@ class ExiftoolStripper(parser.GenericParser): | |||
| 11 | A generic stripper class using exiftool as backend | 11 | A generic stripper class using exiftool as backend |
| 12 | ''' | 12 | ''' |
| 13 | 13 | ||
| 14 | def __init__(self, filename, parser, mime, backup, add2archive): | 14 | def __init__(self, filename, parser, mime, backup, **kwargs): |
| 15 | super(ExiftoolStripper, self).__init__(filename, parser, mime, | 15 | super(ExiftoolStripper, self).__init__(filename, parser, mime, backup, **kwargs) |
| 16 | backup, add2archive) | ||
| 17 | self.allowed = ['ExifTool Version Number', 'File Name', 'Directory', | 16 | self.allowed = ['ExifTool Version Number', 'File Name', 'Directory', |
| 18 | 'File Size', 'File Modification Date/Time', 'File Permissions', | 17 | 'File Size', 'File Modification Date/Time', 'File Permissions', |
| 19 | 'File Type', 'MIME Type', 'Image Width', 'Image Height', | 18 | 'File Type', 'MIME Type', 'Image Width', 'Image Height', |
| @@ -102,7 +102,7 @@ def secure_remove(filename): | |||
| 102 | logging.error('Unable to remove %s' % filename) | 102 | logging.error('Unable to remove %s' % filename) |
| 103 | 103 | ||
| 104 | 104 | ||
| 105 | def create_class_file(name, backup, add2archive): | 105 | def create_class_file(name, backup, **kwargs): |
| 106 | ''' | 106 | ''' |
| 107 | return a $FILETYPEStripper() class, | 107 | return a $FILETYPEStripper() class, |
| 108 | corresponding to the filetype of the given file | 108 | corresponding to the filetype of the given file |
| @@ -149,4 +149,7 @@ def create_class_file(name, backup, add2archive): | |||
| 149 | logging.info('Don\'t have stripper for %s format' % mime) | 149 | logging.info('Don\'t have stripper for %s format' % mime) |
| 150 | return None | 150 | return None |
| 151 | 151 | ||
| 152 | return stripper_class(filename, parser, mime, backup, add2archive) | 152 | if mime.endswith('pdf') and mime.startswith('application/'): |
| 153 | return stripper_class(filename, parser, mime, backup, **kwargs) | ||
| 154 | |||
| 155 | return stripper_class(filename, parser, mime, backup, **kwargs) | ||
diff --git a/MAT/misc.py b/MAT/misc.py index d084861..1ffc327 100644 --- a/MAT/misc.py +++ b/MAT/misc.py | |||
| @@ -12,9 +12,8 @@ class TorrentStripper(parser.GenericParser): | |||
| 12 | Represent a torrent file with the help | 12 | Represent a torrent file with the help |
| 13 | of the bencode lib from Petru Paler | 13 | of the bencode lib from Petru Paler |
| 14 | ''' | 14 | ''' |
| 15 | def __init__(self, filename, parser, mime, backup, add2archive): | 15 | def __init__(self, filename, parser, mime, backup, **kwargs): |
| 16 | super(TorrentStripper, self).__init__(filename, parser, mime, | 16 | super(TorrentStripper, self).__init__(filename, parser, mime, backup) |
| 17 | backup, add2archive) | ||
| 18 | self.fields = ['comment', 'creation date', 'created by'] | 17 | self.fields = ['comment', 'creation date', 'created by'] |
| 19 | 18 | ||
| 20 | def is_clean(self): | 19 | def is_clean(self): |
diff --git a/MAT/office.py b/MAT/office.py index d14125b..190a6d4 100644 --- a/MAT/office.py +++ b/MAT/office.py | |||
| @@ -107,8 +107,9 @@ class OpenDocumentStripper(archive.GenericArchiveStripper): | |||
| 107 | try: | 107 | try: |
| 108 | zipin.getinfo('meta.xml') | 108 | zipin.getinfo('meta.xml') |
| 109 | except KeyError: # no meta.xml in the file | 109 | except KeyError: # no meta.xml in the file |
| 110 | kwargs = {'backup':self.backup, 'add2archive':self.add2archive} | ||
| 110 | czf = archive.ZipStripper(self.filename, self.parser, | 111 | czf = archive.ZipStripper(self.filename, self.parser, |
| 111 | 'application/zip', self.backup, self.add2archive) | 112 | 'application/zip', **kwargs) |
| 112 | if czf.is_clean(): | 113 | if czf.is_clean(): |
| 113 | zipin.close() | 114 | zipin.close() |
| 114 | return True | 115 | return True |
| @@ -120,11 +121,11 @@ class PdfStripper(parser.GenericParser): | |||
| 120 | ''' | 121 | ''' |
| 121 | Represent a PDF file | 122 | Represent a PDF file |
| 122 | ''' | 123 | ''' |
| 123 | def __init__(self, filename, parser, mime, backup, add2archive): | 124 | def __init__(self, filename, parser, mime, backup, **kwargs): |
| 124 | super(PdfStripper, self).__init__(filename, parser, mime, backup, | 125 | super(PdfStripper, self).__init__(filename, parser, mime, backup, **kwargs) |
| 125 | add2archive) | ||
| 126 | uri = 'file://' + os.path.abspath(self.filename) | 126 | uri = 'file://' + os.path.abspath(self.filename) |
| 127 | self.password = None | 127 | self.password = None |
| 128 | self.quality = kwargs['low_pdf_quality'] | ||
| 128 | self.document = poppler.document_new_from_file(uri, self.password) | 129 | self.document = poppler.document_new_from_file(uri, self.password) |
| 129 | self.meta_list = frozenset(['title', 'author', 'subject', 'keywords', 'creator', | 130 | self.meta_list = frozenset(['title', 'author', 'subject', 'keywords', 'creator', |
| 130 | 'producer', 'metadata']) | 131 | 'producer', 'metadata']) |
| @@ -161,7 +162,10 @@ class PdfStripper(parser.GenericParser): | |||
| 161 | for pagenum in xrange(self.document.get_n_pages()): | 162 | for pagenum in xrange(self.document.get_n_pages()): |
| 162 | page = self.document.get_page(pagenum) | 163 | page = self.document.get_page(pagenum) |
| 163 | context.translate(0, 0) | 164 | context.translate(0, 0) |
| 164 | page.render_for_printing(context) # render the page on context | 165 | if self.quality: |
| 166 | page.render(context) # render the page on context | ||
| 167 | else: | ||
| 168 | page.render_for_printing(context) # render the page on context | ||
| 165 | context.show_page() # draw context on surface | 169 | context.show_page() # draw context on surface |
| 166 | surface.finish() | 170 | surface.finish() |
| 167 | 171 | ||
| @@ -175,7 +179,6 @@ class PdfStripper(parser.GenericParser): | |||
| 175 | writer.trailer = trailer | 179 | writer.trailer = trailer |
| 176 | writer.write(self.output) | 180 | writer.write(self.output) |
| 177 | self.do_backup() | 181 | self.do_backup() |
| 178 | return True | ||
| 179 | except: | 182 | except: |
| 180 | print('Unable to remove all metadata from %s, please install\ | 183 | print('Unable to remove all metadata from %s, please install\ |
| 181 | pdfrw' % self.output) | 184 | pdfrw' % self.output) |
| @@ -248,8 +251,9 @@ class OpenXmlStripper(archive.GenericArchiveStripper): | |||
| 248 | if item.startswith('docProps/'): | 251 | if item.startswith('docProps/'): |
| 249 | return False | 252 | return False |
| 250 | zipin.close() | 253 | zipin.close() |
| 254 | kwargs = {'backup':self.backup, 'add2archive':self.add2archive} | ||
| 251 | czf = archive.ZipStripper(self.filename, self.parser, | 255 | czf = archive.ZipStripper(self.filename, self.parser, |
| 252 | 'application/zip', self.backup, self.add2archive) | 256 | 'application/zip', **kwargs) |
| 253 | return czf.is_clean() | 257 | return czf.is_clean() |
| 254 | 258 | ||
| 255 | def get_meta(self): | 259 | def get_meta(self): |
diff --git a/MAT/parser.py b/MAT/parser.py index 4427b01..6be2b03 100644 --- a/MAT/parser.py +++ b/MAT/parser.py | |||
| @@ -23,7 +23,7 @@ class GenericParser(object): | |||
| 23 | ''' | 23 | ''' |
| 24 | Parent class of all parsers | 24 | Parent class of all parsers |
| 25 | ''' | 25 | ''' |
| 26 | def __init__(self, filename, parser, mime, backup, add2archive): | 26 | def __init__(self, filename, parser, mime, backup, **kwargs): |
| 27 | self.filename = '' | 27 | self.filename = '' |
| 28 | self.parser = parser | 28 | self.parser = parser |
| 29 | self.mime = mime | 29 | self.mime = mime |
| @@ -26,6 +26,8 @@ The default behaviour is to clean files given in argument') | |||
| 26 | help='Keep a backup copy') | 26 | help='Keep a backup copy') |
| 27 | options.add_option('--force', '-f', action='store_true', default=False, | 27 | options.add_option('--force', '-f', action='store_true', default=False, |
| 28 | help='Don\'t check if files are clean before cleaning') | 28 | help='Don\'t check if files are clean before cleaning') |
| 29 | options.add_option('--low-pdf-quality', '-L', action='store_true', default=False, | ||
| 30 | help='Produces a lighter, but lower quality PDF') | ||
| 29 | 31 | ||
| 30 | info = optparse.OptionGroup(parser, 'Informations') | 32 | info = optparse.OptionGroup(parser, 'Informations') |
| 31 | info.add_option('--check', '-c', action='store_true', default=False, | 33 | info.add_option('--check', '-c', action='store_true', default=False, |
| @@ -135,7 +137,7 @@ def main(): | |||
| 135 | 137 | ||
| 136 | for filename in filenames: | 138 | for filename in filenames: |
| 137 | class_file = mat.create_class_file(filename, args.backup, | 139 | class_file = mat.create_class_file(filename, args.backup, |
| 138 | args.add2archive) | 140 | add2archive=args.add2archive, low_pdf_quality=args.low_pdf_quality) |
| 139 | if class_file: | 141 | if class_file: |
| 140 | func(class_file, filename, args.force) | 142 | func(class_file, filename, args.force) |
| 141 | else: | 143 | else: |
| @@ -30,9 +30,9 @@ class CFile(object): | |||
| 30 | This class exist just to be "around" my parser.Generic_parser class, | 30 | This class exist just to be "around" my parser.Generic_parser class, |
| 31 | since the gtk.ListStore does not accept it. | 31 | since the gtk.ListStore does not accept it. |
| 32 | ''' | 32 | ''' |
| 33 | def __init__(self, filename, backup, add2archive): | 33 | def __init__(self, filename, backup, **kwargs): |
| 34 | try: | 34 | try: |
| 35 | self.file = mat.create_class_file(filename, backup, add2archive) | 35 | self.file = mat.create_class_file(filename, backup, **kwargs) |
| 36 | except: | 36 | except: |
| 37 | self.file = None | 37 | self.file = None |
| 38 | 38 | ||
| @@ -46,6 +46,7 @@ class GUI: | |||
| 46 | self.force = False | 46 | self.force = False |
| 47 | self.backup = True | 47 | self.backup = True |
| 48 | self.add2archive = True | 48 | self.add2archive = True |
| 49 | self.pdf_quality = False | ||
| 49 | 50 | ||
| 50 | # Main window | 51 | # Main window |
| 51 | self.window = gtk.Window() | 52 | self.window = gtk.Window() |
| @@ -290,7 +291,7 @@ class GUI: | |||
| 290 | # if filename does not exist | 291 | # if filename does not exist |
| 291 | return False | 292 | return False |
| 292 | 293 | ||
| 293 | cf = CFile(filename, self.backup, self.add2archive) | 294 | cf = CFile(filename, self.backup, add2archive=self.add2archive, low_pdf_quality=self.pdf_quality) |
| 294 | if cf.file: # if the file is supported by the mat | 295 | if cf.file: # if the file is supported by the mat |
| 295 | self.liststore.append([cf, os.path.dirname(cf.file.filename) + os.path.sep, | 296 | self.liststore.append([cf, os.path.dirname(cf.file.filename) + os.path.sep, |
| 296 | cf.file.basename, cf.file.mime, _('unknow'), 'None']) | 297 | cf.file.basename, cf.file.mime, _('unknow'), 'None']) |
| @@ -469,17 +470,23 @@ cleaning')) | |||
| 469 | backup.set_tooltip_text(_('Keep a backup copy')) | 470 | backup.set_tooltip_text(_('Keep a backup copy')) |
| 470 | table.attach(backup, 0, 1, 1, 2) | 471 | table.attach(backup, 0, 1, 1, 2) |
| 471 | 472 | ||
| 473 | pdf_quality = gtk.CheckButton(_('Reduce PDF quality'), False) | ||
| 474 | pdf_quality.set_active(self.pdf_quality) | ||
| 475 | pdf_quality.connect('toggled', self.__invert, 'pdf_quality') | ||
| 476 | pdf_quality.set_tooltip_text(_('Reduce the produced PDF size and quality')) | ||
| 477 | table.attach(pdf_quality, 0, 1, 2, 3) | ||
| 478 | |||
| 472 | add2archive = gtk.CheckButton(_('Add unsupported file to archives'), | 479 | add2archive = gtk.CheckButton(_('Add unsupported file to archives'), |
| 473 | False) | 480 | False) |
| 474 | add2archive.set_active(self.add2archive) | 481 | add2archive.set_active(self.add2archive) |
| 475 | add2archive.connect('toggled', self.__invert, 'add2archive') | 482 | add2archive.connect('toggled', self.__invert, 'add2archive') |
| 476 | add2archive.set_tooltip_text(_('Add non-supported (and so \ | 483 | add2archive.set_tooltip_text(_('Add non-supported (and so \ |
| 477 | non-anonymised) file to output archive')) | 484 | non-anonymised) file to output archive')) |
| 478 | table.attach(add2archive, 0, 1, 2, 3) | 485 | table.attach(add2archive, 0, 1, 3, 4) |
| 479 | 486 | ||
| 480 | hbox.show_all() | 487 | hbox.show_all() |
| 481 | response = dialog.run() | 488 | response = dialog.run() |
| 482 | if response is 0: # gtk.STOCK_OK | 489 | if response == 0: # gtk.STOCK_OK |
| 483 | dialog.destroy() | 490 | dialog.destroy() |
| 484 | 491 | ||
| 485 | def __invert(self, button, name): | 492 | def __invert(self, button, name): |
| @@ -494,6 +501,8 @@ non-anonymised) file to output archive')) | |||
| 494 | # change the "backup" property of all files | 501 | # change the "backup" property of all files |
| 495 | self.liststore[line][0].file.backup = self.backup | 502 | self.liststore[line][0].file.backup = self.backup |
| 496 | self.treeview.get_column(4).set_visible(self.backup) | 503 | self.treeview.get_column(4).set_visible(self.backup) |
| 504 | elif name == 'pdf_quality': | ||
| 505 | self.pdf_quality = not self.pdf_quality | ||
| 497 | elif name == 'add2archive': | 506 | elif name == 'add2archive': |
| 498 | self.add2archive = not self.add2archive | 507 | self.add2archive = not self.add2archive |
| 499 | 508 | ||
