summaryrefslogtreecommitdiff
path: root/MAT
diff options
context:
space:
mode:
authorjvoisin2012-12-19 21:17:02 +0100
committerjvoisin2012-12-19 21:17:02 +0100
commitb0b30b8ed51d6de3671c0b133edd6f7f3230282b (patch)
treee28a33c1c1eb0e3dfa81d0a21c586a003db54b2a /MAT
parent99f982f844aa75cfd895e16bc6f3dcac778e31ac (diff)
Preliminary support of producted pdf quality choice
Diffstat (limited to 'MAT')
-rw-r--r--MAT/archive.py25
-rw-r--r--MAT/exiftool.py5
-rw-r--r--MAT/mat.py7
-rw-r--r--MAT/misc.py5
-rw-r--r--MAT/office.py18
-rw-r--r--MAT/parser.py2
6 files changed, 32 insertions, 30 deletions
diff --git a/MAT/archive.py b/MAT/archive.py
index f78a2a7..69c8f1b 100644
--- a/MAT/archive.py
+++ b/MAT/archive.py
@@ -17,11 +17,10 @@ class GenericArchiveStripper(parser.GenericParser):
17 ''' 17 '''
18 Represent a generic archive 18 Represent a generic archive
19 ''' 19 '''
20 def __init__(self, filename, parser, mime, backup, add2archive): 20 def __init__(self, filename, parser, mime, backup, **kwargs):
21 super(GenericArchiveStripper, self).__init__(filename, parser, mime, 21 super(GenericArchiveStripper, self).__init__(filename, parser, mime, backup, **kwargs)
22 backup, add2archive)
23 self.compression = '' 22 self.compression = ''
24 self.add2archive = add2archive 23 self.add2archive = kwargs['add2archive']
25 self.tempdir = tempfile.mkdtemp() 24 self.tempdir = tempfile.mkdtemp()
26 25
27 def __del__(self): 26 def __del__(self):
@@ -81,7 +80,7 @@ class ZipStripper(GenericArchiveStripper):
81 if os.path.isfile(name): 80 if os.path.isfile(name):
82 try: 81 try:
83 cfile = mat.create_class_file(name, False, 82 cfile = mat.create_class_file(name, False,
84 self.add2archive) 83 add2archive=self.add2archive)
85 if not cfile.is_clean(): 84 if not cfile.is_clean():
86 return False 85 return False
87 except: 86 except:
@@ -129,7 +128,7 @@ harmless format' % item.filename)
129 if os.path.isfile(name): 128 if os.path.isfile(name):
130 try: 129 try:
131 cfile = mat.create_class_file(name, False, 130 cfile = mat.create_class_file(name, False,
132 self.add2archive) 131 add2archive=self.add2archive)
133 cfile.remove_all() 132 cfile.remove_all()
134 logging.debug('Processing %s from %s' % (item.filename, 133 logging.debug('Processing %s from %s' % (item.filename,
135 self.filename)) 134 self.filename))
@@ -173,7 +172,7 @@ class TarStripper(GenericArchiveStripper):
173 #no backup file 172 #no backup file
174 try: 173 try:
175 cfile = mat.create_class_file(name, False, 174 cfile = mat.create_class_file(name, False,
176 self.add2archive) 175 add2archive=self.add2archive)
177 cfile.remove_all() 176 cfile.remove_all()
178 tarout.add(name, item.name, filter=self._remove) 177 tarout.add(name, item.name, filter=self._remove)
179 except: 178 except:
@@ -218,7 +217,7 @@ class TarStripper(GenericArchiveStripper):
218 if item.type == '0': # is item a regular file ? 217 if item.type == '0': # is item a regular file ?
219 try: 218 try:
220 class_file = mat.create_class_file(name, 219 class_file = mat.create_class_file(name,
221 False, self.add2archive) # no backup file 220 False, add2archive=self.add2archive) # no backup file
222 if not class_file.is_clean(): 221 if not class_file.is_clean():
223 tarin.close() 222 tarin.close()
224 return False 223 return False
@@ -256,9 +255,8 @@ class GzipStripper(TarStripper):
256 ''' 255 '''
257 Represent a tar.gz archive 256 Represent a tar.gz archive
258 ''' 257 '''
259 def __init__(self, filename, parser, mime, backup, add2archive): 258 def __init__(self, filename, parser, mime, backup, **kwargs):
260 super(GzipStripper, self).__init__(filename, parser, mime, backup, 259 super(GzipStripper, self).__init__(filename, parser, mime, backup, **kwargs)
261 add2archive)
262 self.compression = ':gz' 260 self.compression = ':gz'
263 261
264 262
@@ -266,7 +264,6 @@ class Bzip2Stripper(TarStripper):
266 ''' 264 '''
267 Represents a tar.bz2 archive 265 Represents a tar.bz2 archive
268 ''' 266 '''
269 def __init__(self, filename, parser, mime, backup, add2archive): 267 def __init__(self, filename, parser, mime, backup, **kwargs):
270 super(Bzip2Stripper, self).__init__(filename, parser, mime, backup, 268 super(Bzip2Stripper, self).__init__(filename, parser, mime, backup, **kwargs)
271 add2archive)
272 self.compression = ':bz2' 269 self.compression = ':bz2'
diff --git a/MAT/exiftool.py b/MAT/exiftool.py
index 758a094..eeefcb8 100644
--- a/MAT/exiftool.py
+++ b/MAT/exiftool.py
@@ -11,9 +11,8 @@ class ExiftoolStripper(parser.GenericParser):
11 A generic stripper class using exiftool as backend 11 A generic stripper class using exiftool as backend
12 ''' 12 '''
13 13
14 def __init__(self, filename, parser, mime, backup, add2archive): 14 def __init__(self, filename, parser, mime, backup, **kwargs):
15 super(ExiftoolStripper, self).__init__(filename, parser, mime, 15 super(ExiftoolStripper, self).__init__(filename, parser, mime, backup, **kwargs)
16 backup, add2archive)
17 self.allowed = ['ExifTool Version Number', 'File Name', 'Directory', 16 self.allowed = ['ExifTool Version Number', 'File Name', 'Directory',
18 'File Size', 'File Modification Date/Time', 'File Permissions', 17 'File Size', 'File Modification Date/Time', 'File Permissions',
19 'File Type', 'MIME Type', 'Image Width', 'Image Height', 18 'File Type', 'MIME Type', 'Image Width', 'Image Height',
diff --git a/MAT/mat.py b/MAT/mat.py
index 4c12333..84f26f2 100644
--- a/MAT/mat.py
+++ b/MAT/mat.py
@@ -102,7 +102,7 @@ def secure_remove(filename):
102 logging.error('Unable to remove %s' % filename) 102 logging.error('Unable to remove %s' % filename)
103 103
104 104
105def create_class_file(name, backup, add2archive): 105def create_class_file(name, backup, **kwargs):
106 ''' 106 '''
107 return a $FILETYPEStripper() class, 107 return a $FILETYPEStripper() class,
108 corresponding to the filetype of the given file 108 corresponding to the filetype of the given file
@@ -149,4 +149,7 @@ def create_class_file(name, backup, add2archive):
149 logging.info('Don\'t have stripper for %s format' % mime) 149 logging.info('Don\'t have stripper for %s format' % mime)
150 return None 150 return None
151 151
152 return stripper_class(filename, parser, mime, backup, add2archive) 152 if mime.endswith('pdf') and mime.startswith('application/'):
153 return stripper_class(filename, parser, mime, backup, **kwargs)
154
155 return stripper_class(filename, parser, mime, backup, **kwargs)
diff --git a/MAT/misc.py b/MAT/misc.py
index d084861..1ffc327 100644
--- a/MAT/misc.py
+++ b/MAT/misc.py
@@ -12,9 +12,8 @@ class TorrentStripper(parser.GenericParser):
12 Represent a torrent file with the help 12 Represent a torrent file with the help
13 of the bencode lib from Petru Paler 13 of the bencode lib from Petru Paler
14 ''' 14 '''
15 def __init__(self, filename, parser, mime, backup, add2archive): 15 def __init__(self, filename, parser, mime, backup, **kwargs):
16 super(TorrentStripper, self).__init__(filename, parser, mime, 16 super(TorrentStripper, self).__init__(filename, parser, mime, backup)
17 backup, add2archive)
18 self.fields = ['comment', 'creation date', 'created by'] 17 self.fields = ['comment', 'creation date', 'created by']
19 18
20 def is_clean(self): 19 def is_clean(self):
diff --git a/MAT/office.py b/MAT/office.py
index d14125b..190a6d4 100644
--- a/MAT/office.py
+++ b/MAT/office.py
@@ -107,8 +107,9 @@ class OpenDocumentStripper(archive.GenericArchiveStripper):
107 try: 107 try:
108 zipin.getinfo('meta.xml') 108 zipin.getinfo('meta.xml')
109 except KeyError: # no meta.xml in the file 109 except KeyError: # no meta.xml in the file
110 kwargs = {'backup':self.backup, 'add2archive':self.add2archive}
110 czf = archive.ZipStripper(self.filename, self.parser, 111 czf = archive.ZipStripper(self.filename, self.parser,
111 'application/zip', self.backup, self.add2archive) 112 'application/zip', **kwargs)
112 if czf.is_clean(): 113 if czf.is_clean():
113 zipin.close() 114 zipin.close()
114 return True 115 return True
@@ -120,11 +121,11 @@ class PdfStripper(parser.GenericParser):
120 ''' 121 '''
121 Represent a PDF file 122 Represent a PDF file
122 ''' 123 '''
123 def __init__(self, filename, parser, mime, backup, add2archive): 124 def __init__(self, filename, parser, mime, backup, **kwargs):
124 super(PdfStripper, self).__init__(filename, parser, mime, backup, 125 super(PdfStripper, self).__init__(filename, parser, mime, backup, **kwargs)
125 add2archive)
126 uri = 'file://' + os.path.abspath(self.filename) 126 uri = 'file://' + os.path.abspath(self.filename)
127 self.password = None 127 self.password = None
128 self.quality = kwargs['low_pdf_quality']
128 self.document = poppler.document_new_from_file(uri, self.password) 129 self.document = poppler.document_new_from_file(uri, self.password)
129 self.meta_list = frozenset(['title', 'author', 'subject', 'keywords', 'creator', 130 self.meta_list = frozenset(['title', 'author', 'subject', 'keywords', 'creator',
130 'producer', 'metadata']) 131 'producer', 'metadata'])
@@ -161,7 +162,10 @@ class PdfStripper(parser.GenericParser):
161 for pagenum in xrange(self.document.get_n_pages()): 162 for pagenum in xrange(self.document.get_n_pages()):
162 page = self.document.get_page(pagenum) 163 page = self.document.get_page(pagenum)
163 context.translate(0, 0) 164 context.translate(0, 0)
164 page.render_for_printing(context) # render the page on context 165 if self.quality:
166 page.render(context) # render the page on context
167 else:
168 page.render_for_printing(context) # render the page on context
165 context.show_page() # draw context on surface 169 context.show_page() # draw context on surface
166 surface.finish() 170 surface.finish()
167 171
@@ -175,7 +179,6 @@ class PdfStripper(parser.GenericParser):
175 writer.trailer = trailer 179 writer.trailer = trailer
176 writer.write(self.output) 180 writer.write(self.output)
177 self.do_backup() 181 self.do_backup()
178 return True
179 except: 182 except:
180 print('Unable to remove all metadata from %s, please install\ 183 print('Unable to remove all metadata from %s, please install\
181pdfrw' % self.output) 184pdfrw' % self.output)
@@ -248,8 +251,9 @@ class OpenXmlStripper(archive.GenericArchiveStripper):
248 if item.startswith('docProps/'): 251 if item.startswith('docProps/'):
249 return False 252 return False
250 zipin.close() 253 zipin.close()
254 kwargs = {'backup':self.backup, 'add2archive':self.add2archive}
251 czf = archive.ZipStripper(self.filename, self.parser, 255 czf = archive.ZipStripper(self.filename, self.parser,
252 'application/zip', self.backup, self.add2archive) 256 'application/zip', **kwargs)
253 return czf.is_clean() 257 return czf.is_clean()
254 258
255 def get_meta(self): 259 def get_meta(self):
diff --git a/MAT/parser.py b/MAT/parser.py
index 4427b01..6be2b03 100644
--- a/MAT/parser.py
+++ b/MAT/parser.py
@@ -23,7 +23,7 @@ class GenericParser(object):
23 ''' 23 '''
24 Parent class of all parsers 24 Parent class of all parsers
25 ''' 25 '''
26 def __init__(self, filename, parser, mime, backup, add2archive): 26 def __init__(self, filename, parser, mime, backup, **kwargs):
27 self.filename = '' 27 self.filename = ''
28 self.parser = parser 28 self.parser = parser
29 self.mime = mime 29 self.mime = mime