summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjvoisin2012-02-06 02:05:05 +0100
committerjvoisin2012-02-06 02:05:05 +0100
commit2cba152e7c00ff2c422d5e1c911f17ea07f346ed (patch)
treee83a362b8f49f72b0457af7fd566ea37f9815b14
parentc71999c4f789beb8812f9570926f894ac9f1938e (diff)
Merge the two processing mode into a unique one
-rw-r--r--lib/archive.py31
-rw-r--r--lib/mat.py2
-rw-r--r--lib/office.py68
-rw-r--r--lib/parser.py10
-rwxr-xr-xmat15
-rwxr-xr-xmat-gui63
-rw-r--r--mat.13
7 files changed, 32 insertions, 160 deletions
diff --git a/lib/archive.py b/lib/archive.py
index 9993102..a749b29 100644
--- a/lib/archive.py
+++ b/lib/archive.py
@@ -36,22 +36,9 @@ class GenericArchiveStripper(parser.GenericParser):
36 shutil.rmtree(self.tempdir) 36 shutil.rmtree(self.tempdir)
37 37
38 def remove_all(self): 38 def remove_all(self):
39 ''' 39 return self._remove_all()
40 Call _remove_all() with in argument : "normal"
41 '''
42 return self._remove_all('normal')
43 40
44 def remove_all_strict(self): 41 def _remove_all(self):
45 '''
46 call remove_all() with in argument : "strict"
47 '''
48 return self._remove_all('strict')
49
50 def _remove_all(self, method):
51 '''
52 Remove all meta, normal way if method is "normal",
53 else, use the strict way (with possible data loss)
54 '''
55 raise NotImplementedError 42 raise NotImplementedError
56 43
57 44
@@ -127,7 +114,7 @@ harmless format' % item.filename)
127 zipin.close() 114 zipin.close()
128 return metadata 115 return metadata
129 116
130 def _remove_all(self, method): 117 def _remove_all(self):
131 ''' 118 '''
132 So far, the zipfile module does not allow to write a ZipInfo 119 So far, the zipfile module does not allow to write a ZipInfo
133 object into a zipfile (and it's a shame !) : so data added 120 object into a zipfile (and it's a shame !) : so data added
@@ -143,10 +130,7 @@ harmless format' % item.filename)
143 try: 130 try:
144 cfile = mat.create_class_file(name, False, 131 cfile = mat.create_class_file(name, False,
145 self.add2archive) 132 self.add2archive)
146 if method is 'normal': 133 cfile.remove_all()
147 cfile.remove_all()
148 else:
149 cfile.remove_all_strict()
150 logging.debug('Processing %s from %s' % (item.filename, 134 logging.debug('Processing %s from %s' % (item.filename,
151 self.filename)) 135 self.filename))
152 zipout.write(name, item.filename) 136 zipout.write(name, item.filename)
@@ -179,7 +163,7 @@ class TarStripper(GenericArchiveStripper):
179 current_file.gname = '' 163 current_file.gname = ''
180 return current_file 164 return current_file
181 165
182 def _remove_all(self, method): 166 def _remove_all(self):
183 tarin = tarfile.open(self.filename, 'r' + self.compression) 167 tarin = tarfile.open(self.filename, 'r' + self.compression)
184 tarout = tarfile.open(self.output, 'w' + self.compression) 168 tarout = tarfile.open(self.output, 'w' + self.compression)
185 for item in tarin.getmembers(): 169 for item in tarin.getmembers():
@@ -190,10 +174,7 @@ class TarStripper(GenericArchiveStripper):
190 try: 174 try:
191 cfile = mat.create_class_file(name, False, 175 cfile = mat.create_class_file(name, False,
192 self.add2archive) 176 self.add2archive)
193 if method is 'normal': 177 cfile.remove_all()
194 cfile.remove_all()
195 else:
196 cfile.remove_all_strict()
197 tarout.add(name, item.name, filter=self._remove) 178 tarout.add(name, item.name, filter=self._remove)
198 except: 179 except:
199 logging.info('%s\' format is not supported or harmless' % 180 logging.info('%s\' format is not supported or harmless' %
diff --git a/lib/mat.py b/lib/mat.py
index 53d02d8..dfcfc57 100644
--- a/lib/mat.py
+++ b/lib/mat.py
@@ -24,7 +24,7 @@ hachoir_core.config.quiet = True
24fname = '' 24fname = ''
25 25
26#Verbose 26#Verbose
27#LOGGING_LEVEL = logging.DEBUG 27LOGGING_LEVEL = logging.DEBUG
28#hachoir_core.config.quiet = False 28#hachoir_core.config.quiet = False
29#logname = 'report.log' 29#logname = 'report.log'
30 30
diff --git a/lib/office.py b/lib/office.py
index e1d738e..82b817e 100644
--- a/lib/office.py
+++ b/lib/office.py
@@ -49,7 +49,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper):
49 logging.debug('%s has no opendocument metadata' % self.filename) 49 logging.debug('%s has no opendocument metadata' % self.filename)
50 return metadata 50 return metadata
51 51
52 def _remove_all(self, method): 52 def _remove_all(self):
53 ''' 53 '''
54 FIXME ? 54 FIXME ?
55 There is a patch implementing the Zipfile.remove() 55 There is a patch implementing the Zipfile.remove()
@@ -84,10 +84,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper):
84 try: 84 try:
85 cfile = mat.create_class_file(name, False, 85 cfile = mat.create_class_file(name, False,
86 self.add2archive) 86 self.add2archive)
87 if method == 'normal': 87 cfile.remove_all()
88 cfile.remove_all()
89 else:
90 cfile.remove_all_strict()
91 logging.debug('Processing %s from %s' % (item, 88 logging.debug('Processing %s from %s' % (item,
92 self.filename)) 89 self.filename))
93 zipout.write(name, item) 90 zipout.write(name, item)
@@ -137,20 +134,17 @@ class PdfStripper(parser.GenericParser):
137 Check if the file is clean from harmful metadatas 134 Check if the file is clean from harmful metadatas
138 ''' 135 '''
139 for key in self.meta_list: 136 for key in self.meta_list:
140 if self.document.get_property(key) is not None and \ 137 if self.document.get_property(key) != None:
141 self.document.get_property(key) != '':
142 return False 138 return False
143 return True 139 return True
144 140
145
146 def remove_all(self): 141 def remove_all(self):
147 ''' 142 '''
148 Remove supperficial 143 Remove supperficial
149 ''' 144 '''
150 return self._remove_meta() 145 return self._remove_meta()
151 146
152 147 def _remove_meta(self):
153 def remove_all_strict(self):
154 ''' 148 '''
155 Opening the PDF with poppler, then doing a render 149 Opening the PDF with poppler, then doing a render
156 on a cairo pdfsurface for each pages. 150 on a cairo pdfsurface for each pages.
@@ -166,54 +160,26 @@ class PdfStripper(parser.GenericParser):
166 for pagenum in xrange(self.document.get_n_pages()): 160 for pagenum in xrange(self.document.get_n_pages()):
167 page = self.document.get_page(pagenum) 161 page = self.document.get_page(pagenum)
168 context.translate(0, 0) 162 context.translate(0, 0)
169 page.render(context) # render the page on context 163 page.render_for_printing(context) # render the page on context
170 context.show_page() # draw context on surface 164 context.show_page() # draw context on surface
171 surface.finish() 165 surface.finish()
172 return self._remove_meta()
173 166
174 def _remove_meta(self): 167 try:
175 '''
176 Remove superficial/external metadata
177 from a PDF file, using exiftool,
178 of pdfrw if exiftool is not installed
179 '''
180 processed = False
181 try:# try with pdfrw
182 import pdfrw 168 import pdfrw
183 #For now, poppler cannot write meta, so we must use pdfrw 169 #For now, poppler cannot write meta, so we must use pdfrw
184 logging.debug('Removing %s\'s superficial metadata' % self.filename) 170 logging.debug('Removing %s\'s superficial metadata' % self.filename)
185 trailer = pdfrw.PdfReader(self.output) 171 trailer = pdfrw.PdfReader(self.output)
186 trailer.Info.Producer = trailer.Author = trailer.Info.Creator = None 172 trailer.Info.Producer = None
173 trailer.Info.Creator = None
187 writer = pdfrw.PdfWriter() 174 writer = pdfrw.PdfWriter()
188 writer.trailer = trailer 175 writer.trailer = trailer
189 writer.write(self.output) 176 writer.write(self.output)
190 self.do_backup() 177 self.do_backup()
191 processed = True 178 return True
192 except:
193 pass
194
195 try: # try with exiftool
196 subprocess.Popen('exiftool', stdout=open('/dev/null'))
197 import exiftool
198 # Note: '-All=' must be followed by a known exiftool option.
199 if self.backup:
200 process = subprocess.Popen(['exiftool', '-m', '-All=',
201 '-out', self.output, self.filename], stdout=open('/dev/null'))
202 process.wait()
203 else:
204 # Note: '-All=' must be followed by a known exiftool option.
205 process = subprocess.Popen(
206 ['exiftool', '-All=', '-overwrite_original', self.filename],
207 stdout=open('/dev/null'))
208 process.wait()
209 processed = True
210 except: 179 except:
211 pass 180 print('Unable to remove all metadata from %s, please install\
212 181 pdfrw' % self.output)
213 if processed is False: 182 return False
214 logging.error('Please install either pdfrw, or exiftool to\
215 fully handle PDF files')
216 return processed
217 183
218 def get_meta(self): 184 def get_meta(self):
219 ''' 185 '''
@@ -221,8 +187,7 @@ class PdfStripper(parser.GenericParser):
221 ''' 187 '''
222 metadata = {} 188 metadata = {}
223 for key in self.meta_list: 189 for key in self.meta_list:
224 if self.document.get_property(key) is not None and \ 190 if self.document.get_property(key) is not None:
225 self.document.get_property(key) != '':
226 metadata[key] = self.document.get_property(key) 191 metadata[key] = self.document.get_property(key)
227 return metadata 192 return metadata
228 193
@@ -234,7 +199,7 @@ class OpenXmlStripper(archive.GenericArchiveStripper):
234 It contains mostly xml, but can have media blobs, crap, ... 199 It contains mostly xml, but can have media blobs, crap, ...
235 (I don't like this format.) 200 (I don't like this format.)
236 ''' 201 '''
237 def _remove_all(self, method): 202 def _remove_all(self):
238 ''' 203 '''
239 FIXME ? 204 FIXME ?
240 There is a patch implementing the Zipfile.remove() 205 There is a patch implementing the Zipfile.remove()
@@ -258,10 +223,7 @@ class OpenXmlStripper(archive.GenericArchiveStripper):
258 try: 223 try:
259 cfile = mat.create_class_file(name, False, 224 cfile = mat.create_class_file(name, False,
260 self.add2archive) 225 self.add2archive)
261 if method == 'normal': 226 cfile.remove_all()
262 cfile.remove_all()
263 else:
264 cfile.remove_all_strict()
265 logging.debug('Processing %s from %s' % (item, 227 logging.debug('Processing %s from %s' % (item,
266 self.filename)) 228 self.filename))
267 zipout.write(name, item) 229 zipout.write(name, item)
diff --git a/lib/parser.py b/lib/parser.py
index 6dc5d0b..d2eaf9c 100644
--- a/lib/parser.py
+++ b/lib/parser.py
@@ -78,16 +78,6 @@ class GenericParser(object):
78 except: 78 except:
79 return False 79 return False
80 80
81 def remove_all_strict(self):
82 '''
83 If the remove_all() is not efficient enough,
84 this method is implemented :
85 It is efficient, but destructive.
86 In a perfect world, with nice fileformat,
87 this method would not exist.
88 '''
89 self.remove_all()
90
91 def _remove(self, fieldset, field): 81 def _remove(self, fieldset, field):
92 ''' 82 '''
93 Delete the given field 83 Delete the given field
diff --git a/mat b/mat
index ef83d84..468c76a 100755
--- a/mat
+++ b/mat
@@ -26,8 +26,6 @@ The default behaviour is to clean files given in argument')
26 help='Keep a backup copy') 26 help='Keep a backup copy')
27 options.add_option('--force', '-f', action='store_true', default=False, 27 options.add_option('--force', '-f', action='store_true', default=False,
28 help='Don\'t check if files are clean before cleaning') 28 help='Don\'t check if files are clean before cleaning')
29 options.add_option('--strict', '-u', action='store_true', default=False,
30 help='Strict cleaning mode : loss can occur')
31 29
32 info = optparse.OptionGroup(parser, 'Informations') 30 info = optparse.OptionGroup(parser, 'Informations')
33 info.add_option('--check', '-c', action='store_true', default=False, 31 info.add_option('--check', '-c', action='store_true', default=False,
@@ -97,17 +95,6 @@ def clean_meta(class_file, filename, force):
97 else: 95 else:
98 print('Unable to clean %s', filename) 96 print('Unable to clean %s', filename)
99 97
100def clean_meta_strict(class_file, filename, force):
101 '''
102 Clean the file 'filename', strict way
103 '''
104 print('[+] Cleaning %s' % filename)
105 if force is False and class_file.is_clean():
106 print('%s is already clean' % filename)
107 else:
108 class_file.remove_all_strict()
109 print('%s cleaned' % filename)
110
111 98
112def list_supported(): 99def list_supported():
113 ''' 100 '''
@@ -142,8 +129,6 @@ def main():
142 func = list_meta 129 func = list_meta
143 elif args.check is True: # only check if the file is clean 130 elif args.check is True: # only check if the file is clean
144 func = is_clean 131 func = is_clean
145 elif args.strict is True: # destructive anonymisation method
146 func = clean_meta_strict
147 elif args.list is True: # print the list of all supported format 132 elif args.list is True: # print the list of all supported format
148 list_supported() 133 list_supported()
149 else: # clean the file 134 else: # clean the file
diff --git a/mat-gui b/mat-gui
index db007e5..5c28732 100755
--- a/mat-gui
+++ b/mat-gui
@@ -103,18 +103,9 @@ class GUI:
103 toolbar.add(toolbutton) 103 toolbar.add(toolbutton)
104 104
105 toolbutton = gtk.ToolButton(gtk.STOCK_PRINT_REPORT) 105 toolbutton = gtk.ToolButton(gtk.STOCK_PRINT_REPORT)
106 toolbutton.set_label(_('Clean (lossless)')) 106 toolbutton.set_label(_('Clean'))
107 toolbutton.connect('clicked', self.__process_files, self.__mat_clean) 107 toolbutton.connect('clicked', self.__process_files, self.__mat_clean)
108 toolbutton.set_tooltip_text(_('Clean selected files without possible \ 108 toolbutton.set_tooltip_text(_('Clean selected files'))
109data loss'))
110 toolbar.add(toolbutton)
111
112 toolbutton = gtk.ToolButton(gtk.STOCK_PRINT_WARNING)
113 toolbutton.set_label(_('Clean (strict)'))
114 toolbutton.connect('clicked', self.__process_files,
115 self.__mat_clean_strict)
116 toolbutton.set_tooltip_text(_('Clean selected files with possible \
117data loss, but clean more efficiently'))
118 toolbar.add(toolbutton) 109 toolbar.add(toolbutton)
119 110
120 toolbutton = gtk.ToolButton(gtk.STOCK_FIND) 111 toolbutton = gtk.ToolButton(gtk.STOCK_FIND)
@@ -203,22 +194,11 @@ data loss, but clean more efficiently'))
203 picture = gtk.Image() 194 picture = gtk.Image()
204 picture.set_from_stock(gtk.STOCK_PRINT_REPORT, gtk.ICON_SIZE_MENU) 195 picture.set_from_stock(gtk.STOCK_PRINT_REPORT, gtk.ICON_SIZE_MENU)
205 item.set_image(picture) 196 item.set_image(picture)
206 item.set_label(_('Clean (lossless)')) 197 item.set_label(_('Clean'))
207 item.connect('activate', self.__process_files, self.__mat_clean) 198 item.connect('activate', self.__process_files, self.__mat_clean)
208 process_menu.append(item) 199 process_menu.append(item)
209 200
210 item = gtk.ImageMenuItem() 201 item = gtk.ImageMenuItem()
211 key, mod = gtk.accelerator_parse('<Control>S')
212 item.add_accelerator('activate', self.accelerator,
213 key, mod, gtk.ACCEL_VISIBLE)
214 picture = gtk.Image()
215 picture.set_from_stock(gtk.STOCK_PRINT_WARNING, gtk.ICON_SIZE_MENU)
216 item.set_image(picture)
217 item.set_label(_('Clean (strict)'))
218 item.connect('activate', self.__process_files, self.__mat_clean_strict)
219 process_menu.append(item)
220
221 item = gtk.ImageMenuItem()
222 key, mod = gtk.accelerator_parse('<Control>h') 202 key, mod = gtk.accelerator_parse('<Control>h')
223 item.add_accelerator('activate', self.accelerator, 203 item.add_accelerator('activate', self.accelerator,
224 key, mod, gtk.ACCEL_VISIBLE) 204 key, mod, gtk.ACCEL_VISIBLE)
@@ -276,7 +256,6 @@ data loss, but clean more efficiently'))
276 for root, dirs, files in os.walk(filename): 256 for root, dirs, files in os.walk(filename):
277 for item in files: 257 for item in files:
278 path_to_file = os.path.join(root, item) 258 path_to_file = os.path.join(root, item)
279
280 if self.__add_file_to_treeview(path_to_file): 259 if self.__add_file_to_treeview(path_to_file):
281 not_supported.append(item) 260 not_supported.append(item)
282 else: # filename is a regular file 261 else: # filename is a regular file
@@ -493,11 +472,11 @@ non-anonymised) file to output archive'))
493 ''' 472 '''
494 for line in iterator: # for each file in selection 473 for line in iterator: # for each file in selection
495 self.statusbar.push(0, _('Checking %s...') % self.liststore[line][1]) 474 self.statusbar.push(0, _('Checking %s...') % self.liststore[line][1])
496 if self.liststore[line][3] != _('Clean (strict)'): 475 if self.force is True or self.liststore[line][3] != _('Clean'):
497 if self.liststore[line][0].file.is_clean(): 476 if self.liststore[line][0].file.is_clean():
498 string = _('Clean (lossless)') 477 string = _('Clean')
499 else: 478 else:
500 string = _('dirty') 479 string = _('Dirty')
501 logging.info('%s is %s' % (self.liststore[line][1], string)) 480 logging.info('%s is %s' % (self.liststore[line][1], string))
502 self.liststore[line][3] = string 481 self.liststore[line][3] = string
503 yield True 482 yield True
@@ -509,33 +488,11 @@ non-anonymised) file to output archive'))
509 Clean selected elements 488 Clean selected elements
510 ''' 489 '''
511 for line in iterator: # for each file in selection 490 for line in iterator: # for each file in selection
512 logging.info('Cleaning (lossless) %s' % self.liststore[line][1]) 491 logging.info('Cleaning %s' % self.liststore[line][1])
513 self.statusbar.push(0, _('Cleaning %s...') % self.liststore[line][1])
514 if self.liststore[line][3] != _('Clean (strict)'):
515 # if the file is not already strict cleaned
516 if self.force or not self.liststore[line][0].file.is_clean():
517 if self.liststore[line][0].file.remove_all():
518 # if everything went fine
519 self.liststore[line][3] = _('Clean (lossless)')
520 if self.backup: # the backup copy state
521 self.liststore[line][4] = self.liststore[line][0].file.output
522 yield True
523 self.statusbar.push(0, _('Ready'))
524 yield False
525
526 def __mat_clean_strict(self, iterator):
527 '''
528 Clean selected elements (ugly way)
529 '''
530 for line in iterator: # for each file in selection
531 logging.info(_('Cleaning (strict) %s') % self.liststore[line][1])
532 self.statusbar.push(0, _('Cleaning %s...') % self.liststore[line][1]) 492 self.statusbar.push(0, _('Cleaning %s...') % self.liststore[line][1])
533 if self.liststore[line][3] != _('Clean (strict)'): 493 if self.force is True or self.liststore[line][3] != _('Clean'):
534 # if the file is not already strict cleaned 494 if self.liststore[line][0].file.remove_all():
535 if self.force or not self.liststore[line][0].file.is_clean(): 495 self.liststore[line][3] = _('Clean')
536 if self.liststore[line][0].file.remove_all_strict():
537 # if everything went fine
538 self.liststore[line][3] = _('Clean (strict)')
539 if self.backup: # the backup copy state 496 if self.backup: # the backup copy state
540 self.liststore[line][4] = self.liststore[line][0].file.output 497 self.liststore[line][4] = self.liststore[line][0].file.output
541 yield True 498 yield True
diff --git a/mat.1 b/mat.1
index 178aa64..3e266f9 100644
--- a/mat.1
+++ b/mat.1
@@ -42,9 +42,6 @@ Don't check if files are clean before cleaning
42\fB\-l\fR, \fB\-\-list\fR 42\fB\-l\fR, \fB\-\-list\fR
43List all supported fileformat 43List all supported fileformat
44.TP 44.TP
45\fB\-u\fR, \fB\-\-strict\fR
46Remove harmful meta, but loss can occure
47.TP
48\fB\-v\fR, \fB\-\-version\fR 45\fB\-v\fR, \fB\-\-version\fR
49Display version and exit 46Display version and exit
50 47