summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjvoisin2011-07-26 14:06:38 +0200
committerjvoisin2011-07-26 14:06:38 +0200
commite62ae6a87f630cbd389cf1b75672b06cd56973c8 (patch)
tree5433e5bde0d0448795626190f8014c61b38ac1c5
parentf6e3d57173604dab7228c830e84415ead02e169b (diff)
Pyflakes and pep8 validation
-rwxr-xr-xcli.py27
-rw-r--r--gui.py74
-rw-r--r--lib/archive.py20
-rw-r--r--lib/audio.py4
-rw-r--r--lib/images.py4
-rw-r--r--lib/mat.py19
-rw-r--r--lib/misc.py48
-rw-r--r--lib/office.py24
-rw-r--r--lib/parser.py21
9 files changed, 151 insertions, 90 deletions
diff --git a/cli.py b/cli.py
index b9c8a5c..bfedbf6 100755
--- a/cli.py
+++ b/cli.py
@@ -10,10 +10,11 @@ import hachoir_core
10 10
11__version__ = '0.1' 11__version__ = '0.1'
12 12
13
13def parse(): 14def parse():
14 parser = optparse.OptionParser(usage='%prog [options] filename') 15 parser = optparse.OptionParser(usage='%prog [options] filename')
15 parser.add_option('--add2archive', '-a', action='store_true', default=False, 16 parser.add_option('--add2archive', '-a', action='store_true',
16 help='Add to outputed archive non-supported filetypes') 17 default=False, help='Add to outputed archive non-supported filetypes')
17 parser.add_option('--backup', '-b', action='store_true', default=False, 18 parser.add_option('--backup', '-b', action='store_true', default=False,
18 help='Keep a backup copy') 19 help='Keep a backup copy')
19 parser.add_option('--check', '-c', action='store_true', default=False, 20 parser.add_option('--check', '-c', action='store_true', default=False,
@@ -31,15 +32,17 @@ def parse():
31 sys.exit(0) 32 sys.exit(0)
32 return values, arguments 33 return values, arguments
33 34
35
34def display_version(*args): 36def display_version(*args):
35 print('Metadata Anonymisation Toolkit version %s') % mat.__version__ 37 print('Metadata Anonymisation Toolkit version %s') % mat.__version__
36 print('CLI version %s') % __version__ 38 print('CLI version %s') % __version__
37 print('Hachoir version %s') % hachoir_core.__version__ 39 print('Hachoir version %s') % hachoir_core.__version__
38 sys.exit(0) 40 sys.exit(0)
39 41
42
40def list_meta(class_file, filename): 43def list_meta(class_file, filename):
41 ''' 44 '''
42 Print all the meta of 'filename' on stdout 45 Print all the meta of 'filename' on stdout
43 ''' 46 '''
44 print('[+] File %s :' % filename) 47 print('[+] File %s :' % filename)
45 if class_file.is_clean(): 48 if class_file.is_clean():
@@ -48,18 +51,20 @@ def list_meta(class_file, filename):
48 for key, value in class_file.get_meta().iteritems(): 51 for key, value in class_file.get_meta().iteritems():
49 print(key + ' : ' + str(value)) 52 print(key + ' : ' + str(value))
50 53
54
51def is_clean(class_file, filename): 55def is_clean(class_file, filename):
52 ''' 56 '''
53 Say if 'filename' is clean or not 57 Say if 'filename' is clean or not
54 ''' 58 '''
55 if class_file.is_clean(): 59 if class_file.is_clean():
56 print('[+] %s is clean' % filename) 60 print('[+] %s is clean' % filename)
57 else: 61 else:
58 print('[+] %s is not clean' % filename) 62 print('[+] %s is not clean' % filename)
59 63
64
60def clean_meta(class_file, filename): 65def clean_meta(class_file, filename):
61 ''' 66 '''
62 Clean the file 'filename' 67 Clean the file 'filename'
63 ''' 68 '''
64 print('[+] Cleaning %s' % filename) 69 print('[+] Cleaning %s' % filename)
65 if class_file.is_clean(): 70 if class_file.is_clean():
@@ -68,9 +73,10 @@ def clean_meta(class_file, filename):
68 class_file.remove_all() 73 class_file.remove_all()
69 print('%s cleaned !' % filename) 74 print('%s cleaned !' % filename)
70 75
76
71def clean_meta_ugly(class_file, filename): 77def clean_meta_ugly(class_file, filename):
72 ''' 78 '''
73 Clean the file 'filename', ugly way 79 Clean the file 'filename', ugly way
74 ''' 80 '''
75 print('[+] Cleaning %s' % filename) 81 print('[+] Cleaning %s' % filename)
76 if class_file.is_clean(): 82 if class_file.is_clean():
@@ -79,17 +85,18 @@ def clean_meta_ugly(class_file, filename):
79 class_file.remove_all_ugly() 85 class_file.remove_all_ugly()
80 print('%s cleaned' % filename) 86 print('%s cleaned' % filename)
81 87
88
82def main(): 89def main():
83 args, filenames = parse() 90 args, filenames = parse()
84 91
85 #func receive the function correponding to the options given as parameters 92 #func receive the function correponding to the options given as parameters
86 if args.display is True: #only print metadatas 93 if args.display is True: # only print metadatas
87 func = list_meta 94 func = list_meta
88 elif args.check is True: #only check if the file is clean 95 elif args.check is True: # only check if the file is clean
89 func = is_clean 96 func = is_clean
90 elif args.ugly is True: #destructive anonymisation method 97 elif args.ugly is True: # destructive anonymisation method
91 func = clean_meta_ugly 98 func = clean_meta_ugly
92 else: #clean the file 99 else: # clean the file
93 func = clean_meta 100 func = clean_meta
94 101
95 for filename in filenames: 102 for filename in filenames:
diff --git a/gui.py b/gui.py
index 978bd4b..550010a 100644
--- a/gui.py
+++ b/gui.py
@@ -1,21 +1,21 @@
1#!/usr/bin/env python 1#!/usr/bin/env python
2 2
3from gi.repository import Gtk, GObject, Gdk 3from gi.repository import Gtk, GObject
4import os 4import os
5import glob
6import logging 5import logging
7from lib import mat 6from lib import mat
8 7
9__version__ = '0.1' 8__version__ = '0.1'
10__author__ = 'jvoisin' 9__author__ = 'jvoisin'
11 10
12logging.basicConfig(level = mat.LOGGING_LEVEL) 11logging.basicConfig(level=mat.LOGGING_LEVEL)
13 12
14SUPPORTED = (('image/png', 'image/jpeg', 'image/gif', 13SUPPORTED = (('image/png', 'image/jpeg', 'image/gif',
15 'misc/pdf'), 14 'misc/pdf'),
16 ('*.jpg', '*.jpeg', '*.png', '*.bmp', '*.pdf', 15 ('*.jpg', '*.jpeg', '*.png', '*.bmp', '*.pdf',
17 '*.tar', '*.tar.bz2', '*.tar.gz', '*.mp3')) 16 '*.tar', '*.tar.bz2', '*.tar.gz', '*.mp3'))
18 17
18
19class cfile(GObject.GObject): 19class cfile(GObject.GObject):
20 ''' 20 '''
21 Contain the class-file of the file "path" 21 Contain the class-file of the file "path"
@@ -29,6 +29,7 @@ class cfile(GObject.GObject):
29 except: 29 except:
30 self.file = None 30 self.file = None
31 31
32
32class ListStoreApp: 33class ListStoreApp:
33 ''' 34 '''
34 Main GUI class 35 Main GUI class
@@ -40,7 +41,8 @@ class ListStoreApp:
40 self.add2archive = True 41 self.add2archive = True
41 42
42 self.window = Gtk.Window() 43 self.window = Gtk.Window()
43 self.window.set_title('Metadata Anonymisation Toolkit %s' % __version__) 44 self.window.set_title('Metadata Anonymisation Toolkit %s' %
45 __version__)
44 self.window.connect('destroy', Gtk.main_quit) 46 self.window.connect('destroy', Gtk.main_quit)
45 self.window.set_default_size(800, 600) 47 self.window.set_default_size(800, 600)
46 48
@@ -55,13 +57,12 @@ class ListStoreApp:
55 vbox.pack_start(content, True, True, 0) 57 vbox.pack_start(content, True, True, 0)
56 58
57 #parser.class - name - type - cleaned 59 #parser.class - name - type - cleaned
58 self.liststore= Gtk.ListStore(cfile ,str, str, str) 60 self.liststore= Gtk.ListStore(cfile, str, str, str)
59 61
60 treeview = Gtk.TreeView(model=self.liststore) 62 treeview = Gtk.TreeView(model=self.liststore)
61 treeview.set_search_column(1) #name column is searchable 63 treeview.set_search_column(1) # name column is searchable
62 treeview.set_rules_hint(True) #alternate colors for rows 64 treeview.set_rules_hint(True) # alternate colors for rows
63 treeview.set_rubber_banding(True) #mouse selection 65 treeview.set_rubber_banding(True) # mouse selection
64 treeview.drag_dest_set(Gtk.DestDefaults.ALL, None, Gdk.DragAction.COPY)
65 self.add_columns(treeview) 66 self.add_columns(treeview)
66 self.selection = treeview.get_selection() 67 self.selection = treeview.get_selection()
67 self.selection.set_mode(Gtk.SelectionMode.MULTIPLE) 68 self.selection.set_mode(Gtk.SelectionMode.MULTIPLE)
@@ -80,12 +81,12 @@ class ListStoreApp:
80 ''' 81 '''
81 toolbar = Gtk.Toolbar() 82 toolbar = Gtk.Toolbar()
82 83
83 toolbutton = Gtk.ToolButton(label = 'Add', stock_id=Gtk.STOCK_ADD) 84 toolbutton = Gtk.ToolButton(label='Add', stock_id=Gtk.STOCK_ADD)
84 toolbutton.connect('clicked', self.add_files) 85 toolbutton.connect('clicked', self.add_files)
85 toolbutton.set_tooltip_text('Add files') 86 toolbutton.set_tooltip_text('Add files')
86 toolbar.add(toolbutton) 87 toolbar.add(toolbutton)
87 88
88 toolbutton = Gtk.ToolButton(label = 'Clean', 89 toolbutton = Gtk.ToolButton(label='Clean',
89 stock_id=Gtk.STOCK_PRINT_REPORT) 90 stock_id=Gtk.STOCK_PRINT_REPORT)
90 toolbutton.connect('clicked', self.mat_clean) 91 toolbutton.connect('clicked', self.mat_clean)
91 toolbutton.set_tooltip_text('Clean selected files without data loss') 92 toolbutton.set_tooltip_text('Clean selected files without data loss')
@@ -93,7 +94,8 @@ class ListStoreApp:
93 94
94 toolbutton = Gtk.ToolButton(label='Brute Clean', 95 toolbutton = Gtk.ToolButton(label='Brute Clean',
95 stock_id=Gtk.STOCK_PRINT_WARNING) 96 stock_id=Gtk.STOCK_PRINT_WARNING)
96 toolbutton.set_tooltip_text('Clean selected files with possible data loss') 97 toolbutton.set_tooltip_text('Clean selected files with possible data \
98 loss')
97 toolbar.add(toolbutton) 99 toolbar.add(toolbutton)
98 100
99 toolbutton = Gtk.ToolButton(label='Check', stock_id=Gtk.STOCK_FIND) 101 toolbutton = Gtk.ToolButton(label='Check', stock_id=Gtk.STOCK_FIND)
@@ -117,8 +119,8 @@ class ListStoreApp:
117 119
118 for i, j in enumerate(colname): 120 for i, j in enumerate(colname):
119 filenameColumn = Gtk.CellRendererText() 121 filenameColumn = Gtk.CellRendererText()
120 column = Gtk.TreeViewColumn(j, filenameColumn, text=i+1) 122 column = Gtk.TreeViewColumn(j, filenameColumn, text=i + 1)
121 column.set_sort_column_id(i+1) 123 column.set_sort_column_id(i + 1)
122 treeview.append_column(column) 124 treeview.append_column(column)
123 125
124 def create_menu_item(self, name, func, menu, pix): 126 def create_menu_item(self, name, func, menu, pix):
@@ -156,8 +158,8 @@ class ListStoreApp:
156 Gtk.STOCK_QUIT) 158 Gtk.STOCK_QUIT)
157 159
158 edit_menu = self.create_sub_menu('Edit', menubar) 160 edit_menu = self.create_sub_menu('Edit', menubar)
159 self.create_menu_item('Clear the filelist', self.clear_model, edit_menu, 161 self.create_menu_item('Clear the filelist', self.clear_model,
160 Gtk.STOCK_REMOVE) 162 edit_menu, Gtk.STOCK_REMOVE)
161 self.create_menu_item('Preferences', self.preferences, edit_menu, 163 self.create_menu_item('Preferences', self.preferences, edit_menu,
162 Gtk.STOCK_PREFERENCES) 164 Gtk.STOCK_PREFERENCES)
163 165
@@ -181,9 +183,9 @@ class ListStoreApp:
181 ''' 183 '''
182 filter = Gtk.FileFilter() 184 filter = Gtk.FileFilter()
183 filter.set_name('Supported files') 185 filter.set_name('Supported files')
184 for item in SUPPORTED[0]: #add by mime 186 for item in SUPPORTED[0]: # add by mime
185 filter.add_mime_type(item) 187 filter.add_mime_type(item)
186 for item in SUPPORTED[1]: #add by extension 188 for item in SUPPORTED[1]: # add by extension
187 filter.add_pattern(item) 189 filter.add_pattern(item)
188 return filter 190 return filter
189 191
@@ -195,8 +197,7 @@ class ListStoreApp:
195 title='Choose files', 197 title='Choose files',
196 parent=None, 198 parent=None,
197 action=Gtk.FileChooserAction.OPEN, 199 action=Gtk.FileChooserAction.OPEN,
198 buttons=(Gtk.STOCK_OK, 0, Gtk.STOCK_CANCEL, 1) 200 buttons=(Gtk.STOCK_OK, 0, Gtk.STOCK_CANCEL, 1))
199 )
200 chooser.set_default_response(0) 201 chooser.set_default_response(0)
201 chooser.set_select_multiple(True) 202 chooser.set_select_multiple(True)
202 203
@@ -208,15 +209,15 @@ class ListStoreApp:
208 209
209 response = chooser.run() 210 response = chooser.run()
210 211
211 if response is 0: #Gtk.STOCK_OK 212 if response is 0: # Gtk.STOCK_OK
212 filenames = chooser.get_filenames() 213 filenames = chooser.get_filenames()
213 chooser.destroy() 214 chooser.destroy()
214 for item in filenames: 215 for item in filenames:
215 if os.path.isdir(item): #directory 216 if os.path.isdir(item): # directory
216 for root, dirs, files in os.walk(item): 217 for root, dirs, files in os.walk(item):
217 for name in files: 218 for name in files:
218 self.populate(os.path.join(root, name)) 219 self.populate(os.path.join(root, name))
219 else: #regular file 220 else: # regular file
220 self.populate(item) 221 self.populate(item)
221 chooser.destroy() 222 chooser.destroy()
222 223
@@ -226,7 +227,8 @@ class ListStoreApp:
226 ''' 227 '''
227 cf = cfile(item, self.backup, self.add2archive) 228 cf = cfile(item, self.backup, self.add2archive)
228 if cf.file is not None: 229 if cf.file is not None:
229 self.liststore.append([cf, cf.file.filename, cf.file.mime,'unknow']) 230 self.liststore.append([cf, cf.file.filename,
231 cf.file.mime, 'unknow'])
230 232
231 def about(self, button=None): 233 def about(self, button=None):
232 w = Gtk.AboutDialog() 234 w = Gtk.AboutDialog()
@@ -235,7 +237,7 @@ class ListStoreApp:
235 w.set_comments('This software was coded during the GSoC 2011') 237 w.set_comments('This software was coded during the GSoC 2011')
236 w.set_website('https://gitweb.torproject.org/user/jvoisin/mat.git') 238 w.set_website('https://gitweb.torproject.org/user/jvoisin/mat.git')
237 w.set_website_label('Website') 239 w.set_website_label('Website')
238 w.set_authors(['Julien (jvoisin) Voisin',]) 240 w.set_authors(['Julien (jvoisin) Voisin', ])
239 w.set_program_name('Metadata Anonymistion Toolkit') 241 w.set_program_name('Metadata Anonymistion Toolkit')
240 click = w.run() 242 click = w.run()
241 if click: 243 if click:
@@ -250,29 +252,30 @@ class ListStoreApp:
250 hbox = Gtk.HBox() 252 hbox = Gtk.HBox()
251 content_area.pack_start(hbox, False, False, 0) 253 content_area.pack_start(hbox, False, False, 0)
252 icon = Gtk.Image(stock=Gtk.STOCK_PREFERENCES, 254 icon = Gtk.Image(stock=Gtk.STOCK_PREFERENCES,
253 icon_size=Gtk.IconSize.DIALOG)#the little picture on the left 255 icon_size=Gtk.IconSize.DIALOG) # the little picture on the left
254 256
255 hbox.pack_start(icon, False, False, 0) 257 hbox.pack_start(icon, False, False, 0)
256 258
257 table = Gtk.Table(3, 2, False)#nb rows, nb lines 259 table = Gtk.Table(3, 2, False) # nb rows, nb lines
258 table.set_row_spacings(4) 260 table.set_row_spacings(4)
259 table.set_col_spacings(4) 261 table.set_col_spacings(4)
260 hbox.pack_start(table, True, True, 0) 262 hbox.pack_start(table, True, True, 0)
261 263
262 force = Gtk.CheckButton('Force Clean', False) 264 force = Gtk.CheckButton('Force Clean', False)
263 force.connect('toggled', self.invert, 'force') 265 force.connect('toggled', self.invert, 'force')
264 force.set_tooltip_text('Do not check if already clean before cleaning.') 266 force.set_tooltip_text('Do not check if already clean before cleaning')
265 force.set_active(self.force) 267 force.set_active(self.force)
266 268
267 backup = Gtk.CheckButton('Backup', False) 269 backup = Gtk.CheckButton('Backup', False)
268 backup.connect('toggled', self.invert, 'backup') 270 backup.connect('toggled', self.invert, 'backup')
269 backup.set_tooltip_text('Keep a backup copy.') 271 backup.set_tooltip_text('Keep a backup copy')
270 backup.set_active(self.backup) 272 backup.set_active(self.backup)
271 273
272 add2archive = Gtk.CheckButton('Add unsupported file to archives', False) 274 add2archive = Gtk.CheckButton('Add unsupported file to archives',
275 False)
273 add2archive.connect('toggled', self.invert, 'add2archive') 276 add2archive.connect('toggled', self.invert, 'add2archive')
274 add2archive.set_tooltip_text('Add non-supported (and so non-anonymised)\ 277 add2archive.set_tooltip_text('Add non-supported (and so \
275 file to outputed archive.') 278non-anonymised) file to outputed archive')
276 add2archive.set_active(self.add2archive) 279 add2archive.set_active(self.add2archive)
277 280
278 table.attach_defaults(force, 0, 1, 0, 1) 281 table.attach_defaults(force, 0, 1, 0, 1)
@@ -281,10 +284,10 @@ class ListStoreApp:
281 284
282 hbox.show_all() 285 hbox.show_all()
283 response = dialog.run() 286 response = dialog.run()
284 if response is 0:#Gtk.STOCK_OK 287 if response is 0: # Gtk.STOCK_OK
285 dialog.destroy() 288 dialog.destroy()
286 289
287 def invert(self, button, name): #Still not better :/ 290 def invert(self, button, name): # still not better :/
288 if name is 'force': 291 if name is 'force':
289 self.force = not self.force 292 self.force = not self.force
290 elif name is 'ugly': 293 elif name is 'ugly':
@@ -338,8 +341,9 @@ class ListStoreApp:
338 self.liststore[i][0].file.remove_all_ugly() 341 self.liststore[i][0].file.remove_all_ugly()
339 self.liststore[i][3] = 'clean' 342 self.liststore[i][3] = 'clean'
340 343
344
341def main(): 345def main():
342 app = ListStoreApp() 346 ListStoreApp()
343 Gtk.main() 347 Gtk.main()
344 348
345if __name__ == '__main__': 349if __name__ == '__main__':
diff --git a/lib/archive.py b/lib/archive.py
index f22af39..f11506a 100644
--- a/lib/archive.py
+++ b/lib/archive.py
@@ -9,11 +9,13 @@ import tempfile
9import parser 9import parser
10import mat 10import mat
11 11
12
12class GenericArchiveStripper(parser.Generic_parser): 13class GenericArchiveStripper(parser.Generic_parser):
13 ''' 14 '''
14 Represent a generic archive 15 Represent a generic archive
15 ''' 16 '''
16 def __init__(self, realname, filename, parser, editor, backup, add2archive): 17 def __init__(self, realname, filename, parser, editor, backup,
18 add2archive):
17 super(GenericArchiveStripper, self).__init__(realname, 19 super(GenericArchiveStripper, self).__init__(realname,
18 filename, parser, editor, backup, add2archive) 20 filename, parser, editor, backup, add2archive)
19 self.compression = '' 21 self.compression = ''
@@ -32,6 +34,7 @@ class GenericArchiveStripper(parser.Generic_parser):
32 def remove_all_ugly(self): 34 def remove_all_ugly(self):
33 self._remove_all('ugly') 35 self._remove_all('ugly')
34 36
37
35class ZipStripper(GenericArchiveStripper): 38class ZipStripper(GenericArchiveStripper):
36 ''' 39 '''
37 Represent a zip file 40 Represent a zip file
@@ -94,7 +97,6 @@ harmless format' % item.filename)
94 zipin.close() 97 zipin.close()
95 return metadata 98 return metadata
96 99
97
98 def _remove_all(self, method): 100 def _remove_all(self, method):
99 ''' 101 '''
100 So far, the zipfile module does not allow to write a ZipInfo 102 So far, the zipfile module does not allow to write a ZipInfo
@@ -150,7 +152,7 @@ class TarStripper(GenericArchiveStripper):
150 for item in tarin.getmembers(): 152 for item in tarin.getmembers():
151 tarin.extract(item, self.tempdir) 153 tarin.extract(item, self.tempdir)
152 name = os.path.join(self.tempdir, item.name) 154 name = os.path.join(self.tempdir, item.name)
153 if item.type is '0': #is item a regular file ? 155 if item.type is '0': # is item a regular file ?
154 #no backup file 156 #no backup file
155 try: 157 try:
156 cfile = mat.create_class_file(name, False, 158 cfile = mat.create_class_file(name, False,
@@ -164,7 +166,7 @@ class TarStripper(GenericArchiveStripper):
164 logging.info('%s\' format is not supported' % 166 logging.info('%s\' format is not supported' %
165 item.name) 167 item.name)
166 if self.add2archive: 168 if self.add2archive:
167 tarout.add(name, item.name,filter=self._remove) 169 tarout.add(name, item.name, filter=self._remove)
168 mat.secure_remove(name) 170 mat.secure_remove(name)
169 tarin.close() 171 tarin.close()
170 tarout.close() 172 tarout.close()
@@ -194,7 +196,7 @@ class TarStripper(GenericArchiveStripper):
194 return False 196 return False
195 tarin.extract(item, self.tempdir) 197 tarin.extract(item, self.tempdir)
196 name = os.path.join(self.tempdir, item.name) 198 name = os.path.join(self.tempdir, item.name)
197 if item.type is '0': #is item a regular file ? 199 if item.type is '0': # is item a regular file ?
198 #no backup file 200 #no backup file
199 try: 201 try:
200 class_file = mat.create_class_file(name, 202 class_file = mat.create_class_file(name,
@@ -216,7 +218,7 @@ class TarStripper(GenericArchiveStripper):
216 metadata = {} 218 metadata = {}
217 for current_file in tarin.getmembers(): 219 for current_file in tarin.getmembers():
218 if current_file.type is '0': 220 if current_file.type is '0':
219 if not self.is_file_clean(current_file):#if there is meta 221 if not self.is_file_clean(current_file): # if there is meta
220 current_meta = {} 222 current_meta = {}
221 current_meta['mtime'] = current_file.mtime 223 current_meta['mtime'] = current_file.mtime
222 current_meta['uid'] = current_file.uid 224 current_meta['uid'] = current_file.uid
@@ -229,14 +231,16 @@ class TarStripper(GenericArchiveStripper):
229 231
230 232
231class GzipStripper(TarStripper): 233class GzipStripper(TarStripper):
232 def __init__(self, realname, filename, parser, editor, backup, add2archive): 234 def __init__(self, realname, filename, parser, editor, backup,
235 add2archive):
233 super(GzipStripper, self).__init__(realname, 236 super(GzipStripper, self).__init__(realname,
234 filename, parser, editor, backup, add2archive) 237 filename, parser, editor, backup, add2archive)
235 self.compression = ':gz' 238 self.compression = ':gz'
236 239
237 240
238class Bzip2Stripper(TarStripper): 241class Bzip2Stripper(TarStripper):
239 def __init__(self, realname, filename, parser, editor, backup, add2archive): 242 def __init__(self, realname, filename, parser, editor, backup,
243 add2archive):
240 super(Bzip2Stripper, self).__init__(realname, 244 super(Bzip2Stripper, self).__init__(realname,
241 filename, parser, editor, backup, add2archive) 245 filename, parser, editor, backup, add2archive)
242 self.compression = ':bz2' 246 self.compression = ':bz2'
diff --git a/lib/audio.py b/lib/audio.py
index 6d653bc..35d4fde 100644
--- a/lib/audio.py
+++ b/lib/audio.py
@@ -1,6 +1,10 @@
1import parser 1import parser
2 2
3
3class MpegAudioStripper(parser.Generic_parser): 4class MpegAudioStripper(parser.Generic_parser):
5 '''
6 mpeg audio file (mp3, ...)
7 '''
4 def _should_remove(self, field): 8 def _should_remove(self, field):
5 if field.name in ("id3v1", "id3v2"): 9 if field.name in ("id3v1", "id3v2"):
6 return True 10 return True
diff --git a/lib/images.py b/lib/images.py
index 4441b70..bab0bfb 100644
--- a/lib/images.py
+++ b/lib/images.py
@@ -1,8 +1,5 @@
1import parser 1import parser
2 2
3class BmpStripper(parser.Generic_parser):
4 def _should_remove(self, field):
5 return False
6 3
7class JpegStripper(parser.Generic_parser): 4class JpegStripper(parser.Generic_parser):
8 def _should_remove(self, field): 5 def _should_remove(self, field):
@@ -13,6 +10,7 @@ class JpegStripper(parser.Generic_parser):
13 else: 10 else:
14 return False 11 return False
15 12
13
16class PngStripper(parser.Generic_parser): 14class PngStripper(parser.Generic_parser):
17 def _should_remove(self, field): 15 def _should_remove(self, field):
18 if field.name.startswith("text["): 16 if field.name.startswith("text["):
diff --git a/lib/mat.py b/lib/mat.py
index ccf653f..e4371ce 100644
--- a/lib/mat.py
+++ b/lib/mat.py
@@ -23,12 +23,11 @@ __author__ = 'jvoisin'
23 23
24LOGGING_LEVEL = logging.DEBUG 24LOGGING_LEVEL = logging.DEBUG
25 25
26logging.basicConfig(level = LOGGING_LEVEL) 26logging.basicConfig(level=LOGGING_LEVEL)
27 27
28strippers = { 28strippers = {
29 hachoir_parser.image.JpegFile: images.JpegStripper, 29 hachoir_parser.image.JpegFile: images.JpegStripper,
30 hachoir_parser.image.PngFile: images.PngStripper, 30 hachoir_parser.image.PngFile: images.PngStripper,
31 hachoir_parser.image.bmp.BmpFile: images.BmpStripper,
32 hachoir_parser.audio.MpegAudioFile: audio.MpegAudioStripper, 31 hachoir_parser.audio.MpegAudioFile: audio.MpegAudioStripper,
33 hachoir_parser.misc.PDFDocument: office.PdfStripper, 32 hachoir_parser.misc.PDFDocument: office.PdfStripper,
34 hachoir_parser.archive.TarFile: archive.TarStripper, 33 hachoir_parser.archive.TarFile: archive.TarStripper,
@@ -37,6 +36,7 @@ strippers = {
37 hachoir_parser.archive.zip.ZipFile: archive.ZipStripper, 36 hachoir_parser.archive.zip.ZipFile: archive.ZipStripper,
38} 37}
39 38
39
40def secure_remove(filename): 40def secure_remove(filename):
41 ''' 41 '''
42 securely remove the file 42 securely remove the file
@@ -52,10 +52,11 @@ def is_secure(filename):
52 Prevent shell injection 52 Prevent shell injection
53 ''' 53 '''
54 54
55 if not(os.path.isfile(filename)): #check if the file exist 55 if not(os.path.isfile(filename)): # check if the file exist
56 logging.error('Error: %s is not a valid file' % filename) 56 logging.error('Error: %s is not a valid file' % filename)
57 return False 57 return False
58 58
59
59def create_class_file(name, backup, add2archive): 60def create_class_file(name, backup, add2archive):
60 ''' 61 '''
61 return a $FILETYPEStripper() class, 62 return a $FILETYPEStripper() class,
@@ -68,7 +69,7 @@ def create_class_file(name, backup, add2archive):
68 realname = name 69 realname = name
69 try: 70 try:
70 filename = hachoir_core.cmd_line.unicodeFilename(name) 71 filename = hachoir_core.cmd_line.unicodeFilename(name)
71 except TypeError:# get rid of "TypeError: decoding Unicode is not supported" 72 except TypeError: # get rid of "decoding Unicode is not supported"
72 filename = name 73 filename = name
73 parser = hachoir_parser.createParser(filename) 74 parser = hachoir_parser.createParser(filename)
74 if not parser: 75 if not parser:
@@ -88,22 +89,22 @@ def create_class_file(name, backup, add2archive):
88 logging.info('Don\'t have stripper for format %s' % editor.description) 89 logging.info('Don\'t have stripper for format %s' % editor.description)
89 return 90 return
90 91
91 if editor.input.__class__ == hachoir_parser.misc.PDFDocument:#pdf 92 if editor.input.__class__ == hachoir_parser.misc.PDFDocument: # pdf
92 return stripper_class(filename, realname, backup) 93 return stripper_class(filename, realname, backup)
93 94
94 elif editor.input.__class__ == hachoir_parser.archive.zip.ZipFile: 95 elif editor.input.__class__ == hachoir_parser.archive.zip.ZipFile:
95 #zip based format 96 #zip based format
96 mime = mimetypes.guess_type(filename)[0] 97 mime = mimetypes.guess_type(filename)[0]
97 try:#Ugly workaround, cleaning open document delete mime (wtf?) 98 try: # ugly workaround, cleaning open document delete mime (wtf?)
98 if mime.startswith('application/vnd.oasis.opendocument'): 99 if mime.startswith('application/vnd.oasis.opendocument'):
99 return office.OpenDocumentStripper(realname, filename, parser, 100 return office.OpenDocumentStripper(realname, filename, parser,
100 editor, backup, add2archive) 101 editor, backup, add2archive)
101 else:#normal zip 102 else: # normal zip
102 return stripper_class(realname, filename, parser, editor, 103 return stripper_class(realname, filename, parser, editor,
103 backup, add2archive) 104 backup, add2archive)
104 except:#normal zip file 105 except: # normal zip
105 return stripper_class(realname, filename, parser, editor, backup, 106 return stripper_class(realname, filename, parser, editor, backup,
106 add2archive) 107 add2archive)
107 else:#normal handling 108 else: # normal handling
108 return stripper_class(realname, filename, parser, editor, backup, 109 return stripper_class(realname, filename, parser, editor, backup,
109 add2archive) 110 add2archive)
diff --git a/lib/misc.py b/lib/misc.py
new file mode 100644
index 0000000..ce14313
--- /dev/null
+++ b/lib/misc.py
@@ -0,0 +1,48 @@
1import hachoir_core
2import parser
3
4
5class TorrentStripper(parser.Generic_parser):
6 '''
7 A torrent file looks like:
8 -root
9 -start
10 -announce
11 -announce-list
12 -comment
13 -created_by
14 -creation_date
15 -encoding
16 -info
17 -end
18 '''
19 def remove_all(self):
20 for field in self.editor['root']:
21 if self._should_remove(field):
22 #FIXME : hachoir does not support torrent metadata editing :<
23 del self.editor['/root/' + field.name]
24 hachoir_core.field.writeIntoFile(self.editor,
25 self.filename + parser.POSTFIX)
26 self.do_backup()
27
28 def is_clean(self):
29 for field in self.editor['root']:
30 if self._should_remove(field):
31 return False
32 return True
33
34 def get_meta(self):
35 metadata = {}
36 for field in self.editor['root']:
37 if self._should_remove(field):
38 try: # FIXME
39 metadata[field.name] = field.value
40 except:
41 metadata[field.name] = 'harmful content'
42 return metadata
43
44 def _should_remove(self, field):
45 if field.name in ('comment', 'created_by', 'creation_date', 'info'):
46 return True
47 else:
48 return False
diff --git a/lib/office.py b/lib/office.py
index 27677d2..432bc0b 100644
--- a/lib/office.py
+++ b/lib/office.py
@@ -5,17 +5,16 @@ import tempfile
5import glob 5import glob
6import logging 6import logging
7import zipfile 7import zipfile
8import shutil
9import re 8import re
10from xml.etree import ElementTree 9from xml.etree import ElementTree
11 10
12import hachoir_core
13 11
14import pdfrw 12import pdfrw
15import mat 13import mat
16import parser 14import parser
17import archive 15import archive
18 16
17
19class OpenDocumentStripper(archive.GenericArchiveStripper): 18class OpenDocumentStripper(archive.GenericArchiveStripper):
20 ''' 19 '''
21 An open document file is a zip, with xml file into. 20 An open document file is a zip, with xml file into.
@@ -32,11 +31,10 @@ class OpenDocumentStripper(archive.GenericArchiveStripper):
32 for node in tree.iter(): 31 for node in tree.iter():
33 key = re.sub('{.*}', '', node.tag) 32 key = re.sub('{.*}', '', node.tag)
34 metadata[key] = node.text 33 metadata[key] = node.text
35 except KeyError:#no meta.xml file found 34 except KeyError: # no meta.xml file found
36 logging.debug('%s has no opendocument metadata' % self.filename) 35 logging.debug('%s has no opendocument metadata' % self.filename)
37 return metadata 36 return metadata
38 37
39
40 def _remove_all(self, method): 38 def _remove_all(self, method):
41 ''' 39 '''
42 FIXME ? 40 FIXME ?
@@ -50,7 +48,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper):
50 name = os.path.join(self.tempdir, item) 48 name = os.path.join(self.tempdir, item)
51 if item.endswith('.xml') or item == 'mimetype': 49 if item.endswith('.xml') or item == 'mimetype':
52 #keep .xml files, and the "manifest" file 50 #keep .xml files, and the "manifest" file
53 if item != 'meta.xml':#contains the metadata 51 if item != 'meta.xml': # contains the metadata
54 zipin.extract(item, self.tempdir) 52 zipin.extract(item, self.tempdir)
55 zipout.write(name, item) 53 zipout.write(name, item)
56 mat.secure_remove(name) 54 mat.secure_remove(name)
@@ -73,7 +71,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper):
73 self.filename)) 71 self.filename))
74 zipout.write(name, item) 72 zipout.write(name, item)
75 except: 73 except:
76 logging.info('%s\' fileformat is not supported' % item) 74 logging.info('%s\' fileformat is not supported' % item)
77 if self.add2archive: 75 if self.add2archive:
78 zipout.write(name, item) 76 zipout.write(name, item)
79 mat.secure_remove(name) 77 mat.secure_remove(name)
@@ -88,7 +86,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper):
88 try: 86 try:
89 zipin.getinfo('meta.xml') 87 zipin.getinfo('meta.xml')
90 return False 88 return False
91 except KeyError:#no meta.xml in the file 89 except KeyError: # no meta.xml in the file
92 zipin.close() 90 zipin.close()
93 czf = archive.ZipStripper(self.realname, self.filename, 91 czf = archive.ZipStripper(self.realname, self.filename,
94 self.parser, self.editor, self.backup, self.add2archive) 92 self.parser, self.editor, self.backup, self.add2archive)
@@ -104,7 +102,7 @@ class PdfStripper(parser.Generic_parser):
104 Represent a pdf file, with the help of pdfrw 102 Represent a pdf file, with the help of pdfrw
105 ''' 103 '''
106 def __init__(self, filename, realname, backup): 104 def __init__(self, filename, realname, backup):
107 name, path = os.path.splitext(filename) 105 name, ext = os.path.splitext(filename)
108 self.output = name + '.cleaned' + ext 106 self.output = name + '.cleaned' + ext
109 self.filename = filename 107 self.filename = filename
110 self.backup = backup 108 self.backup = backup
@@ -137,7 +135,7 @@ class PdfStripper(parser.Generic_parser):
137 ''' 135 '''
138 _, self.tmpdir = tempfile.mkstemp() 136 _, self.tmpdir = tempfile.mkstemp()
139 subprocess.call(self.convert % (self.filename, self.tmpdir + 137 subprocess.call(self.convert % (self.filename, self.tmpdir +
140 'temp.jpg'), shell=True)#Convert pages to jpg 138 'temp.jpg'), shell=True) # Convert pages to jpg
141 139
142 for current_file in glob.glob(self.tmpdir + 'temp*'): 140 for current_file in glob.glob(self.tmpdir + 'temp*'):
143 #Clean every jpg image 141 #Clean every jpg image
@@ -145,18 +143,18 @@ class PdfStripper(parser.Generic_parser):
145 class_file.remove_all() 143 class_file.remove_all()
146 144
147 subprocess.call(self.convert % (self.tmpdir + 145 subprocess.call(self.convert % (self.tmpdir +
148 'temp.jpg*', self.output), shell=True)#Assemble jpg into pdf 146 'temp.jpg*', self.output), shell=True) # Assemble jpg into pdf
149 147
150 for current_file in glob.glob(self.tmpdir + 'temp*'): 148 for current_file in glob.glob(self.tmpdir + 'temp*'):
151 #remove jpg files 149 #remove jpg files
152 mat.secure_remove(current_file) 150 mat.secure_remove(current_file)
153 151
154 if self.backup is False: 152 if self.backup is False:
155 mat.secure_remove(self.filename) #remove the old file 153 mat.secure_remove(self.filename) # remove the old file
156 os.rename(self.output, self.filename)#rename the new 154 os.rename(self.output, self.filename) # rename the new
157 name = self.realname 155 name = self.realname
158 else: 156 else:
159 name = output_file 157 name = self.output
160 class_file = mat.create_class_file(name, False) 158 class_file = mat.create_class_file(name, False)
161 class_file.remove_all() 159 class_file.remove_all()
162 160
diff --git a/lib/parser.py b/lib/parser.py
index aa7e7f1..28e0849 100644
--- a/lib/parser.py
+++ b/lib/parser.py
@@ -2,27 +2,25 @@
2 Parent class of all parser 2 Parent class of all parser
3''' 3'''
4 4
5import hachoir_core.error 5import hachoir_core
6import hachoir_parser
7import hachoir_editor
8 6
9import sys
10import os 7import os
11import subprocess
12import mimetypes 8import mimetypes
13 9
14import mat 10import mat
15 11
16NOMETA = ('.txt', '.bmp', '.py', '.xml', '.rdf') 12NOMETA = ('.bmp', 'html', '.py', '.rdf', '.txt', '.xml')
13
17 14
18class Generic_parser(object): 15class Generic_parser(object):
19 def __init__(self, realname, filename, parser, editor, backup, add2archive): 16 def __init__(self, realname, filename, parser, editor, backup,
17 add2archive):
20 basename, ext = os.path.splitext(filename) 18 basename, ext = os.path.splitext(filename)
21 self.output = basename + '.cleaned' + ext 19 self.output = basename + '.cleaned' + ext
22 self.filename = filename #path + filename 20 self.filename = filename # path + filename
23 self.realname = realname #path + filename 21 self.realname = realname # path + filename
24 self.basename = os.path.basename(filename) #only filename 22 self.basename = os.path.basename(filename) # only filename
25 self.mime = mimetypes.guess_type(filename)[0] #mimetype 23 self.mime = mimetypes.guess_type(filename)[0] # mimetype
26 self.parser = parser 24 self.parser = parser
27 self.editor = editor 25 self.editor = editor
28 self.backup = backup 26 self.backup = backup
@@ -56,7 +54,6 @@ class Generic_parser(object):
56 ''' 54 '''
57 self.remove_all() 55 self.remove_all()
58 56
59
60 def _remove(self, field): 57 def _remove(self, field):
61 ''' 58 '''
62 Delete the given field 59 Delete the given field