diff options
| author | jvoisin | 2012-12-19 21:17:02 +0100 |
|---|---|---|
| committer | jvoisin | 2012-12-19 21:17:02 +0100 |
| commit | b0b30b8ed51d6de3671c0b133edd6f7f3230282b (patch) | |
| tree | e28a33c1c1eb0e3dfa81d0a21c586a003db54b2a /MAT | |
| parent | 99f982f844aa75cfd895e16bc6f3dcac778e31ac (diff) | |
Preliminary support of producted pdf quality choice
Diffstat (limited to 'MAT')
| -rw-r--r-- | MAT/archive.py | 25 | ||||
| -rw-r--r-- | MAT/exiftool.py | 5 | ||||
| -rw-r--r-- | MAT/mat.py | 7 | ||||
| -rw-r--r-- | MAT/misc.py | 5 | ||||
| -rw-r--r-- | MAT/office.py | 18 | ||||
| -rw-r--r-- | MAT/parser.py | 2 |
6 files changed, 32 insertions, 30 deletions
diff --git a/MAT/archive.py b/MAT/archive.py index f78a2a7..69c8f1b 100644 --- a/MAT/archive.py +++ b/MAT/archive.py | |||
| @@ -17,11 +17,10 @@ class GenericArchiveStripper(parser.GenericParser): | |||
| 17 | ''' | 17 | ''' |
| 18 | Represent a generic archive | 18 | Represent a generic archive |
| 19 | ''' | 19 | ''' |
| 20 | def __init__(self, filename, parser, mime, backup, add2archive): | 20 | def __init__(self, filename, parser, mime, backup, **kwargs): |
| 21 | super(GenericArchiveStripper, self).__init__(filename, parser, mime, | 21 | super(GenericArchiveStripper, self).__init__(filename, parser, mime, backup, **kwargs) |
| 22 | backup, add2archive) | ||
| 23 | self.compression = '' | 22 | self.compression = '' |
| 24 | self.add2archive = add2archive | 23 | self.add2archive = kwargs['add2archive'] |
| 25 | self.tempdir = tempfile.mkdtemp() | 24 | self.tempdir = tempfile.mkdtemp() |
| 26 | 25 | ||
| 27 | def __del__(self): | 26 | def __del__(self): |
| @@ -81,7 +80,7 @@ class ZipStripper(GenericArchiveStripper): | |||
| 81 | if os.path.isfile(name): | 80 | if os.path.isfile(name): |
| 82 | try: | 81 | try: |
| 83 | cfile = mat.create_class_file(name, False, | 82 | cfile = mat.create_class_file(name, False, |
| 84 | self.add2archive) | 83 | add2archive=self.add2archive) |
| 85 | if not cfile.is_clean(): | 84 | if not cfile.is_clean(): |
| 86 | return False | 85 | return False |
| 87 | except: | 86 | except: |
| @@ -129,7 +128,7 @@ harmless format' % item.filename) | |||
| 129 | if os.path.isfile(name): | 128 | if os.path.isfile(name): |
| 130 | try: | 129 | try: |
| 131 | cfile = mat.create_class_file(name, False, | 130 | cfile = mat.create_class_file(name, False, |
| 132 | self.add2archive) | 131 | add2archive=self.add2archive) |
| 133 | cfile.remove_all() | 132 | cfile.remove_all() |
| 134 | logging.debug('Processing %s from %s' % (item.filename, | 133 | logging.debug('Processing %s from %s' % (item.filename, |
| 135 | self.filename)) | 134 | self.filename)) |
| @@ -173,7 +172,7 @@ class TarStripper(GenericArchiveStripper): | |||
| 173 | #no backup file | 172 | #no backup file |
| 174 | try: | 173 | try: |
| 175 | cfile = mat.create_class_file(name, False, | 174 | cfile = mat.create_class_file(name, False, |
| 176 | self.add2archive) | 175 | add2archive=self.add2archive) |
| 177 | cfile.remove_all() | 176 | cfile.remove_all() |
| 178 | tarout.add(name, item.name, filter=self._remove) | 177 | tarout.add(name, item.name, filter=self._remove) |
| 179 | except: | 178 | except: |
| @@ -218,7 +217,7 @@ class TarStripper(GenericArchiveStripper): | |||
| 218 | if item.type == '0': # is item a regular file ? | 217 | if item.type == '0': # is item a regular file ? |
| 219 | try: | 218 | try: |
| 220 | class_file = mat.create_class_file(name, | 219 | class_file = mat.create_class_file(name, |
| 221 | False, self.add2archive) # no backup file | 220 | False, add2archive=self.add2archive) # no backup file |
| 222 | if not class_file.is_clean(): | 221 | if not class_file.is_clean(): |
| 223 | tarin.close() | 222 | tarin.close() |
| 224 | return False | 223 | return False |
| @@ -256,9 +255,8 @@ class GzipStripper(TarStripper): | |||
| 256 | ''' | 255 | ''' |
| 257 | Represent a tar.gz archive | 256 | Represent a tar.gz archive |
| 258 | ''' | 257 | ''' |
| 259 | def __init__(self, filename, parser, mime, backup, add2archive): | 258 | def __init__(self, filename, parser, mime, backup, **kwargs): |
| 260 | super(GzipStripper, self).__init__(filename, parser, mime, backup, | 259 | super(GzipStripper, self).__init__(filename, parser, mime, backup, **kwargs) |
| 261 | add2archive) | ||
| 262 | self.compression = ':gz' | 260 | self.compression = ':gz' |
| 263 | 261 | ||
| 264 | 262 | ||
| @@ -266,7 +264,6 @@ class Bzip2Stripper(TarStripper): | |||
| 266 | ''' | 264 | ''' |
| 267 | Represents a tar.bz2 archive | 265 | Represents a tar.bz2 archive |
| 268 | ''' | 266 | ''' |
| 269 | def __init__(self, filename, parser, mime, backup, add2archive): | 267 | def __init__(self, filename, parser, mime, backup, **kwargs): |
| 270 | super(Bzip2Stripper, self).__init__(filename, parser, mime, backup, | 268 | super(Bzip2Stripper, self).__init__(filename, parser, mime, backup, **kwargs) |
| 271 | add2archive) | ||
| 272 | self.compression = ':bz2' | 269 | self.compression = ':bz2' |
diff --git a/MAT/exiftool.py b/MAT/exiftool.py index 758a094..eeefcb8 100644 --- a/MAT/exiftool.py +++ b/MAT/exiftool.py | |||
| @@ -11,9 +11,8 @@ class ExiftoolStripper(parser.GenericParser): | |||
| 11 | A generic stripper class using exiftool as backend | 11 | A generic stripper class using exiftool as backend |
| 12 | ''' | 12 | ''' |
| 13 | 13 | ||
| 14 | def __init__(self, filename, parser, mime, backup, add2archive): | 14 | def __init__(self, filename, parser, mime, backup, **kwargs): |
| 15 | super(ExiftoolStripper, self).__init__(filename, parser, mime, | 15 | super(ExiftoolStripper, self).__init__(filename, parser, mime, backup, **kwargs) |
| 16 | backup, add2archive) | ||
| 17 | self.allowed = ['ExifTool Version Number', 'File Name', 'Directory', | 16 | self.allowed = ['ExifTool Version Number', 'File Name', 'Directory', |
| 18 | 'File Size', 'File Modification Date/Time', 'File Permissions', | 17 | 'File Size', 'File Modification Date/Time', 'File Permissions', |
| 19 | 'File Type', 'MIME Type', 'Image Width', 'Image Height', | 18 | 'File Type', 'MIME Type', 'Image Width', 'Image Height', |
| @@ -102,7 +102,7 @@ def secure_remove(filename): | |||
| 102 | logging.error('Unable to remove %s' % filename) | 102 | logging.error('Unable to remove %s' % filename) |
| 103 | 103 | ||
| 104 | 104 | ||
| 105 | def create_class_file(name, backup, add2archive): | 105 | def create_class_file(name, backup, **kwargs): |
| 106 | ''' | 106 | ''' |
| 107 | return a $FILETYPEStripper() class, | 107 | return a $FILETYPEStripper() class, |
| 108 | corresponding to the filetype of the given file | 108 | corresponding to the filetype of the given file |
| @@ -149,4 +149,7 @@ def create_class_file(name, backup, add2archive): | |||
| 149 | logging.info('Don\'t have stripper for %s format' % mime) | 149 | logging.info('Don\'t have stripper for %s format' % mime) |
| 150 | return None | 150 | return None |
| 151 | 151 | ||
| 152 | return stripper_class(filename, parser, mime, backup, add2archive) | 152 | if mime.endswith('pdf') and mime.startswith('application/'): |
| 153 | return stripper_class(filename, parser, mime, backup, **kwargs) | ||
| 154 | |||
| 155 | return stripper_class(filename, parser, mime, backup, **kwargs) | ||
diff --git a/MAT/misc.py b/MAT/misc.py index d084861..1ffc327 100644 --- a/MAT/misc.py +++ b/MAT/misc.py | |||
| @@ -12,9 +12,8 @@ class TorrentStripper(parser.GenericParser): | |||
| 12 | Represent a torrent file with the help | 12 | Represent a torrent file with the help |
| 13 | of the bencode lib from Petru Paler | 13 | of the bencode lib from Petru Paler |
| 14 | ''' | 14 | ''' |
| 15 | def __init__(self, filename, parser, mime, backup, add2archive): | 15 | def __init__(self, filename, parser, mime, backup, **kwargs): |
| 16 | super(TorrentStripper, self).__init__(filename, parser, mime, | 16 | super(TorrentStripper, self).__init__(filename, parser, mime, backup) |
| 17 | backup, add2archive) | ||
| 18 | self.fields = ['comment', 'creation date', 'created by'] | 17 | self.fields = ['comment', 'creation date', 'created by'] |
| 19 | 18 | ||
| 20 | def is_clean(self): | 19 | def is_clean(self): |
diff --git a/MAT/office.py b/MAT/office.py index d14125b..190a6d4 100644 --- a/MAT/office.py +++ b/MAT/office.py | |||
| @@ -107,8 +107,9 @@ class OpenDocumentStripper(archive.GenericArchiveStripper): | |||
| 107 | try: | 107 | try: |
| 108 | zipin.getinfo('meta.xml') | 108 | zipin.getinfo('meta.xml') |
| 109 | except KeyError: # no meta.xml in the file | 109 | except KeyError: # no meta.xml in the file |
| 110 | kwargs = {'backup':self.backup, 'add2archive':self.add2archive} | ||
| 110 | czf = archive.ZipStripper(self.filename, self.parser, | 111 | czf = archive.ZipStripper(self.filename, self.parser, |
| 111 | 'application/zip', self.backup, self.add2archive) | 112 | 'application/zip', **kwargs) |
| 112 | if czf.is_clean(): | 113 | if czf.is_clean(): |
| 113 | zipin.close() | 114 | zipin.close() |
| 114 | return True | 115 | return True |
| @@ -120,11 +121,11 @@ class PdfStripper(parser.GenericParser): | |||
| 120 | ''' | 121 | ''' |
| 121 | Represent a PDF file | 122 | Represent a PDF file |
| 122 | ''' | 123 | ''' |
| 123 | def __init__(self, filename, parser, mime, backup, add2archive): | 124 | def __init__(self, filename, parser, mime, backup, **kwargs): |
| 124 | super(PdfStripper, self).__init__(filename, parser, mime, backup, | 125 | super(PdfStripper, self).__init__(filename, parser, mime, backup, **kwargs) |
| 125 | add2archive) | ||
| 126 | uri = 'file://' + os.path.abspath(self.filename) | 126 | uri = 'file://' + os.path.abspath(self.filename) |
| 127 | self.password = None | 127 | self.password = None |
| 128 | self.quality = kwargs['low_pdf_quality'] | ||
| 128 | self.document = poppler.document_new_from_file(uri, self.password) | 129 | self.document = poppler.document_new_from_file(uri, self.password) |
| 129 | self.meta_list = frozenset(['title', 'author', 'subject', 'keywords', 'creator', | 130 | self.meta_list = frozenset(['title', 'author', 'subject', 'keywords', 'creator', |
| 130 | 'producer', 'metadata']) | 131 | 'producer', 'metadata']) |
| @@ -161,7 +162,10 @@ class PdfStripper(parser.GenericParser): | |||
| 161 | for pagenum in xrange(self.document.get_n_pages()): | 162 | for pagenum in xrange(self.document.get_n_pages()): |
| 162 | page = self.document.get_page(pagenum) | 163 | page = self.document.get_page(pagenum) |
| 163 | context.translate(0, 0) | 164 | context.translate(0, 0) |
| 164 | page.render_for_printing(context) # render the page on context | 165 | if self.quality: |
| 166 | page.render(context) # render the page on context | ||
| 167 | else: | ||
| 168 | page.render_for_printing(context) # render the page on context | ||
| 165 | context.show_page() # draw context on surface | 169 | context.show_page() # draw context on surface |
| 166 | surface.finish() | 170 | surface.finish() |
| 167 | 171 | ||
| @@ -175,7 +179,6 @@ class PdfStripper(parser.GenericParser): | |||
| 175 | writer.trailer = trailer | 179 | writer.trailer = trailer |
| 176 | writer.write(self.output) | 180 | writer.write(self.output) |
| 177 | self.do_backup() | 181 | self.do_backup() |
| 178 | return True | ||
| 179 | except: | 182 | except: |
| 180 | print('Unable to remove all metadata from %s, please install\ | 183 | print('Unable to remove all metadata from %s, please install\ |
| 181 | pdfrw' % self.output) | 184 | pdfrw' % self.output) |
| @@ -248,8 +251,9 @@ class OpenXmlStripper(archive.GenericArchiveStripper): | |||
| 248 | if item.startswith('docProps/'): | 251 | if item.startswith('docProps/'): |
| 249 | return False | 252 | return False |
| 250 | zipin.close() | 253 | zipin.close() |
| 254 | kwargs = {'backup':self.backup, 'add2archive':self.add2archive} | ||
| 251 | czf = archive.ZipStripper(self.filename, self.parser, | 255 | czf = archive.ZipStripper(self.filename, self.parser, |
| 252 | 'application/zip', self.backup, self.add2archive) | 256 | 'application/zip', **kwargs) |
| 253 | return czf.is_clean() | 257 | return czf.is_clean() |
| 254 | 258 | ||
| 255 | def get_meta(self): | 259 | def get_meta(self): |
diff --git a/MAT/parser.py b/MAT/parser.py index 4427b01..6be2b03 100644 --- a/MAT/parser.py +++ b/MAT/parser.py | |||
| @@ -23,7 +23,7 @@ class GenericParser(object): | |||
| 23 | ''' | 23 | ''' |
| 24 | Parent class of all parsers | 24 | Parent class of all parsers |
| 25 | ''' | 25 | ''' |
| 26 | def __init__(self, filename, parser, mime, backup, add2archive): | 26 | def __init__(self, filename, parser, mime, backup, **kwargs): |
| 27 | self.filename = '' | 27 | self.filename = '' |
| 28 | self.parser = parser | 28 | self.parser = parser |
| 29 | self.mime = mime | 29 | self.mime = mime |
