summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjvoisin2011-07-21 21:15:09 +0200
committerjvoisin2011-07-21 21:15:09 +0200
commitbb83090afcd12893c301ef14869e84beda43ccb9 (patch)
treec314f6bb95c0e3c50b885f3d95a014e2e8164ee9
parent54ca53de5e7df9e0c9bd9e25002386d346010594 (diff)
Add the possibility to add (or not) unsupported fileformat to outputed archives fileformat (invasive patch, sorry about that).
-rwxr-xr-xcli.py5
-rw-r--r--gui.py20
-rw-r--r--lib/archive.py29
-rw-r--r--lib/mat.py11
-rw-r--r--lib/parser.py4
-rw-r--r--test/clitest.py4
-rw-r--r--test/libtest.py12
7 files changed, 56 insertions, 29 deletions
diff --git a/cli.py b/cli.py
index a19b8ce..b9c8a5c 100755
--- a/cli.py
+++ b/cli.py
@@ -12,6 +12,8 @@ __version__ = '0.1'
12 12
13def parse(): 13def parse():
14 parser = optparse.OptionParser(usage='%prog [options] filename') 14 parser = optparse.OptionParser(usage='%prog [options] filename')
15 parser.add_option('--add2archive', '-a', action='store_true', default=False,
16 help='Add to outputed archive non-supported filetypes')
15 parser.add_option('--backup', '-b', action='store_true', default=False, 17 parser.add_option('--backup', '-b', action='store_true', default=False,
16 help='Keep a backup copy') 18 help='Keep a backup copy')
17 parser.add_option('--check', '-c', action='store_true', default=False, 19 parser.add_option('--check', '-c', action='store_true', default=False,
@@ -91,7 +93,8 @@ def main():
91 func = clean_meta 93 func = clean_meta
92 94
93 for filename in filenames: 95 for filename in filenames:
94 class_file = mat.create_class_file(filename, args.backup) 96 class_file = mat.create_class_file(filename, args.backup,
97 args.add2archive)
95 if class_file is not None: 98 if class_file is not None:
96 func(class_file, filename) 99 func(class_file, filename)
97 100
diff --git a/gui.py b/gui.py
index 9294fcc..978bd4b 100644
--- a/gui.py
+++ b/gui.py
@@ -22,10 +22,10 @@ class cfile(GObject.GObject):
22 This class exist just to be "around" my parser.Generic_parser class, 22 This class exist just to be "around" my parser.Generic_parser class,
23 since Gtk.ListStore does not accept it. 23 since Gtk.ListStore does not accept it.
24 ''' 24 '''
25 def __init__(self, path, backup): 25 def __init__(self, path, backup, add2archive):
26 GObject.GObject.__init__(self) 26 GObject.GObject.__init__(self)
27 try: 27 try:
28 self.file = mat.create_class_file(path, backup) 28 self.file = mat.create_class_file(path, backup, add2archive)
29 except: 29 except:
30 self.file = None 30 self.file = None
31 31
@@ -37,6 +37,7 @@ class ListStoreApp:
37 #preferences 37 #preferences
38 self.backup = True 38 self.backup = True
39 self.force = False 39 self.force = False
40 self.add2archive = True
40 41
41 self.window = Gtk.Window() 42 self.window = Gtk.Window()
42 self.window.set_title('Metadata Anonymisation Toolkit %s' % __version__) 43 self.window.set_title('Metadata Anonymisation Toolkit %s' % __version__)
@@ -223,7 +224,7 @@ class ListStoreApp:
223 ''' 224 '''
224 Append selected files by add_file to the self.liststore 225 Append selected files by add_file to the self.liststore
225 ''' 226 '''
226 cf = cfile(item, self.backup) 227 cf = cfile(item, self.backup, self.add2archive)
227 if cf.file is not None: 228 if cf.file is not None:
228 self.liststore.append([cf, cf.file.filename, cf.file.mime,'unknow']) 229 self.liststore.append([cf, cf.file.filename, cf.file.mime,'unknow'])
229 230
@@ -253,7 +254,7 @@ class ListStoreApp:
253 254
254 hbox.pack_start(icon, False, False, 0) 255 hbox.pack_start(icon, False, False, 0)
255 256
256 table = Gtk.Table(2, 2, False)#nb rows, nb lines 257 table = Gtk.Table(3, 2, False)#nb rows, nb lines
257 table.set_row_spacings(4) 258 table.set_row_spacings(4)
258 table.set_col_spacings(4) 259 table.set_col_spacings(4)
259 hbox.pack_start(table, True, True, 0) 260 hbox.pack_start(table, True, True, 0)
@@ -268,8 +269,15 @@ class ListStoreApp:
268 backup.set_tooltip_text('Keep a backup copy.') 269 backup.set_tooltip_text('Keep a backup copy.')
269 backup.set_active(self.backup) 270 backup.set_active(self.backup)
270 271
272 add2archive = Gtk.CheckButton('Add unsupported file to archives', False)
273 add2archive.connect('toggled', self.invert, 'add2archive')
274 add2archive.set_tooltip_text('Add non-supported (and so non-anonymised)\
275 file to outputed archive.')
276 add2archive.set_active(self.add2archive)
277
271 table.attach_defaults(force, 0, 1, 0, 1) 278 table.attach_defaults(force, 0, 1, 0, 1)
272 table.attach_defaults(backup, 0, 1, 1, 2) 279 table.attach_defaults(backup, 0, 1, 1, 2)
280 table.attach_defaults(add2archive, 0, 1, 2, 3)
273 281
274 hbox.show_all() 282 hbox.show_all()
275 response = dialog.run() 283 response = dialog.run()
@@ -313,7 +321,7 @@ class ListStoreApp:
313 if self.force: 321 if self.force:
314 self.liststore[i][0].file.remove_all() 322 self.liststore[i][0].file.remove_all()
315 else: 323 else:
316 if not self.liststore[i][0].is_clean(): 324 if not self.liststore[i][0].file.is_clean():
317 self.liststore[i][0].file.remove_all() 325 self.liststore[i][0].file.remove_all()
318 self.liststore[i][3] = 'clean' 326 self.liststore[i][3] = 'clean'
319 327
@@ -326,7 +334,7 @@ class ListStoreApp:
326 if self.force: 334 if self.force:
327 self.liststore[i][0].file.remove_all_ugly() 335 self.liststore[i][0].file.remove_all_ugly()
328 else: 336 else:
329 if not self.liststore[i][0].is_clean(): 337 if not self.liststore[i][0].file.is_clean():
330 self.liststore[i][0].file.remove_all_ugly() 338 self.liststore[i][0].file.remove_all_ugly()
331 self.liststore[i][3] = 'clean' 339 self.liststore[i][3] = 'clean'
332 340
diff --git a/lib/archive.py b/lib/archive.py
index aa2a47f..c31fac6 100644
--- a/lib/archive.py
+++ b/lib/archive.py
@@ -13,11 +13,12 @@ class GenericArchiveStripper(parser.Generic_parser):
13 ''' 13 '''
14 Represent a generic archive 14 Represent a generic archive
15 ''' 15 '''
16 def __init__(self, realname, filename, parser, editor, backup): 16 def __init__(self, realname, filename, parser, editor, backup, add2archive):
17 super(GenericArchiveStripper, self).__init__(realname, 17 super(GenericArchiveStripper, self).__init__(realname,
18 filename, parser, editor, backup) 18 filename, parser, editor, backup, add2archive)
19 self.compression = '' 19 self.compression = ''
20 self.folder_list = [] 20 self.folder_list = []
21 self.add2archive = add2archive
21 22
22 def remove_folder(self): 23 def remove_folder(self):
23 [shutil.rmtree(folder) for folder in self.folder_list] 24 [shutil.rmtree(folder) for folder in self.folder_list]
@@ -49,13 +50,18 @@ class ZipStripper(GenericArchiveStripper):
49 zipin.extract(item) 50 zipin.extract(item)
50 if os.path.isfile(item.filename): 51 if os.path.isfile(item.filename):
51 try: 52 try:
52 cfile = mat.create_class_file(item.filename, False) 53 cfile = mat.create_class_file(item.filename, False,
54 self.add2archive)
53 cfile.remove_all() 55 cfile.remove_all()
54 logging.debug('Processing %s from %s' % (item.filename, 56 logging.debug('Processing %s from %s' % (item.filename,
55 self.filename)) 57 self.filename))
58 zipout.write(item.filename)
56 except: 59 except:
57 print('%s\' filefomart is not supported'%item.filename) 60 logging.info('%s\' filefomart is not supported' %
58 zipout.write(item.filename) 61 item.filename)
62 if self.add2archive:
63 zipout.write(item.filename)
64 mat.secure_remove(item.filename)
59 else: 65 else:
60 self.folder_list.insert(0, item.filename) 66 self.folder_list.insert(0, item.filename)
61 logging.info('%s treated' % self.filename) 67 logging.info('%s treated' % self.filename)
@@ -84,11 +90,15 @@ class TarStripper(GenericArchiveStripper):
84 if current_file.type is '0': #is current_file a regular file ? 90 if current_file.type is '0': #is current_file a regular file ?
85 #no backup file 91 #no backup file
86 try: 92 try:
87 cfile = mat.create_class_file(current_file.name, False) 93 cfile = mat.create_class_file(current_file.name, False,
94 self.add2archive)
88 cfile.remove_all() 95 cfile.remove_all()
96 tarout.add(current_file.name, filter=self._remove)
89 except: 97 except:
90 print('%s\' format is not supported'%current_file.name) 98 logging.info('%s\' format is not supported' %
91 tarout.add(current_file.name, filter=self._remove) 99 current_file.name)
100 if self.add2archive:
101 tarout.add(current_file.name, filter=self._remove)
92 mat.secure_remove(current_file.name) 102 mat.secure_remove(current_file.name)
93 else: 103 else:
94 self.folder_list.insert(0, current_file.name) 104 self.folder_list.insert(0, current_file.name)
@@ -123,7 +133,8 @@ class TarStripper(GenericArchiveStripper):
123 tarin.extract(current_file) 133 tarin.extract(current_file)
124 if current_file.type is '0': #is current_file a regular file ? 134 if current_file.type is '0': #is current_file a regular file ?
125 #no backup file 135 #no backup file
126 class_file = mat.create_class_file(current_file.name, False) 136 class_file = mat.create_class_file(current_file.name, False,
137 self.add2archive)
127 if not class_file.is_clean():#if the extracted file is not clean 138 if not class_file.is_clean():#if the extracted file is not clean
128 mat.secure_remove(current_file.name) #remove it 139 mat.secure_remove(current_file.name) #remove it
129 self.remove_folder() #remove all the remaining folders 140 self.remove_folder() #remove all the remaining folders
diff --git a/lib/mat.py b/lib/mat.py
index 1d7f53b..49a84f1 100644
--- a/lib/mat.py
+++ b/lib/mat.py
@@ -52,14 +52,16 @@ def is_secure(filename):
52 52
53 if not(os.path.isfile(filename)): #check if the file exist 53 if not(os.path.isfile(filename)): #check if the file exist
54 logging.error("Error: %s is not a valid file" % filename) 54 logging.error("Error: %s is not a valid file" % filename)
55 return 55 return False
56 56
57def create_class_file(name, backup): 57def create_class_file(name, backup, add2archive):
58 ''' 58 '''
59 return a $FILETYPEStripper() class, 59 return a $FILETYPEStripper() class,
60 corresponding to the filetype of the given file 60 corresponding to the filetype of the given file
61 ''' 61 '''
62 is_secure(name) 62 if is_secure(name):
63 print 'a'
64 return
63 65
64 filename = "" 66 filename = ""
65 realname = name 67 realname = name
@@ -83,4 +85,5 @@ def create_class_file(name, backup):
83 return 85 return
84 if editor.input.__class__ == hachoir_parser.misc.PDFDocument: 86 if editor.input.__class__ == hachoir_parser.misc.PDFDocument:
85 return stripper_class(filename, realname, backup) 87 return stripper_class(filename, realname, backup)
86 return stripper_class(realname, filename, parser, editor, backup) 88 return stripper_class(realname, filename, parser, editor, backup,
89 add2archive)
diff --git a/lib/parser.py b/lib/parser.py
index 2b7b52e..6e6ee34 100644
--- a/lib/parser.py
+++ b/lib/parser.py
@@ -5,16 +5,18 @@
5import hachoir_core.error 5import hachoir_core.error
6import hachoir_parser 6import hachoir_parser
7import hachoir_editor 7import hachoir_editor
8
8import sys 9import sys
9import os 10import os
10import subprocess 11import subprocess
11import mimetypes 12import mimetypes
13
12import mat 14import mat
13 15
14POSTFIX = ".cleaned" 16POSTFIX = ".cleaned"
15 17
16class Generic_parser(object): 18class Generic_parser(object):
17 def __init__(self, realname, filename, parser, editor, backup): 19 def __init__(self, realname, filename, parser, editor, backup, add2archive):
18 self.filename = filename 20 self.filename = filename
19 self.realname = realname 21 self.realname = realname
20 self.shortname = os.path.basename(filename) 22 self.shortname = os.path.basename(filename)
diff --git a/test/clitest.py b/test/clitest.py
index 91b71ca..f603f97 100644
--- a/test/clitest.py
+++ b/test/clitest.py
@@ -17,14 +17,14 @@ class Test_Remove_cli(test.MATTest):
17 '''make sure that the cli remove all compromizing meta''' 17 '''make sure that the cli remove all compromizing meta'''
18 for clean, dirty in self.file_list: 18 for clean, dirty in self.file_list:
19 subprocess.call(['../cli.py', dirty]) 19 subprocess.call(['../cli.py', dirty])
20 current_file = mat.create_class_file(dirty, False) 20 current_file = mat.create_class_file(dirty, False, True)
21 self.assertTrue(current_file.is_clean()) 21 self.assertTrue(current_file.is_clean())
22 22
23 def test_remove_empty(self): 23 def test_remove_empty(self):
24 '''Test removal with clean files''' 24 '''Test removal with clean files'''
25 for clean, dirty in self.file_list: 25 for clean, dirty in self.file_list:
26 subprocess.call(['../cli.py', clean]) 26 subprocess.call(['../cli.py', clean])
27 current_file = mat.create_class_file(clean, False) 27 current_file = mat.create_class_file(clean, False, True)
28 self.assertTrue(current_file.is_clean()) 28 self.assertTrue(current_file.is_clean())
29 29
30 30
diff --git a/test/libtest.py b/test/libtest.py
index b35b4f3..eea8117 100644
--- a/test/libtest.py
+++ b/test/libtest.py
@@ -14,14 +14,14 @@ class Test_Remove_lib(test.MATTest):
14 def test_remove(self): 14 def test_remove(self):
15 '''make sure that the lib remove all compromizing meta''' 15 '''make sure that the lib remove all compromizing meta'''
16 for clean, dirty in self.file_list: 16 for clean, dirty in self.file_list:
17 current_file = mat.create_class_file(dirty, False) 17 current_file = mat.create_class_file(dirty, False, True)
18 current_file.remove_all() 18 current_file.remove_all()
19 self.assertTrue(current_file.is_clean()) 19 self.assertTrue(current_file.is_clean())
20 20
21 def test_remove_empty(self): 21 def test_remove_empty(self):
22 '''Test removal with clean files''' 22 '''Test removal with clean files'''
23 for clean, dirty in self.file_list: 23 for clean, dirty in self.file_list:
24 current_file = mat.create_class_file(clean, False) 24 current_file = mat.create_class_file(clean, False, True)
25 current_file.remove_all() 25 current_file.remove_all()
26 self.assertTrue(current_file.is_clean()) 26 self.assertTrue(current_file.is_clean())
27 27
@@ -30,14 +30,14 @@ class Test_List_lib(test.MATTest):
30 def test_list(self): 30 def test_list(self):
31 '''check if get_meta returns all the expected meta''' 31 '''check if get_meta returns all the expected meta'''
32 for clean, dirty in self.file_list: 32 for clean, dirty in self.file_list:
33 current_file = mat.create_class_file(dirty, False) 33 current_file = mat.create_class_file(dirty, False, True)
34 #FIXME assertisNotNone() : python 2.7 34 #FIXME assertisNotNone() : python 2.7
35 self.assertTrue(current_file.get_meta()) 35 self.assertTrue(current_file.get_meta())
36 36
37 def testlist_list_empty(self): 37 def testlist_list_empty(self):
38 '''check that a listing of a clean file return an empty dict''' 38 '''check that a listing of a clean file return an empty dict'''
39 for clean, dirty in self.file_list: 39 for clean, dirty in self.file_list:
40 current_file = mat.create_class_file(clean, False) 40 current_file = mat.create_class_file(clean, False, True)
41 self.assertEqual(current_file.get_meta(), dict()) 41 self.assertEqual(current_file.get_meta(), dict())
42 42
43 43
@@ -45,13 +45,13 @@ class Test_isClean_lib(test.MATTest):
45 def test_dirty(self): 45 def test_dirty(self):
46 '''test is_clean on clean files''' 46 '''test is_clean on clean files'''
47 for clean, dirty in self.file_list: 47 for clean, dirty in self.file_list:
48 current_file = mat.create_class_file(dirty, False) 48 current_file = mat.create_class_file(dirty, False, True)
49 self.assertFalse(current_file.is_clean()) 49 self.assertFalse(current_file.is_clean())
50 50
51 def test_clean(self): 51 def test_clean(self):
52 '''test is_clean on dirty files''' 52 '''test is_clean on dirty files'''
53 for clean, dirty in self.file_list: 53 for clean, dirty in self.file_list:
54 current_file = mat.create_class_file(clean, False) 54 current_file = mat.create_class_file(clean, False, True)
55 self.assertTrue(current_file.is_clean()) 55 self.assertTrue(current_file.is_clean())
56 56
57 57