From bb83090afcd12893c301ef14869e84beda43ccb9 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Thu, 21 Jul 2011 21:15:09 +0200 Subject: Add the possibility to add (or not) unsupported fileformat to outputed archives fileformat (invasive patch, sorry about that). --- cli.py | 5 ++++- gui.py | 20 ++++++++++++++------ lib/archive.py | 29 ++++++++++++++++++++--------- lib/mat.py | 11 +++++++---- lib/parser.py | 4 +++- test/clitest.py | 4 ++-- test/libtest.py | 12 ++++++------ 7 files changed, 56 insertions(+), 29 deletions(-) diff --git a/cli.py b/cli.py index a19b8ce..b9c8a5c 100755 --- a/cli.py +++ b/cli.py @@ -12,6 +12,8 @@ __version__ = '0.1' def parse(): parser = optparse.OptionParser(usage='%prog [options] filename') + parser.add_option('--add2archive', '-a', action='store_true', default=False, + help='Add to outputed archive non-supported filetypes') parser.add_option('--backup', '-b', action='store_true', default=False, help='Keep a backup copy') parser.add_option('--check', '-c', action='store_true', default=False, @@ -91,7 +93,8 @@ def main(): func = clean_meta for filename in filenames: - class_file = mat.create_class_file(filename, args.backup) + class_file = mat.create_class_file(filename, args.backup, + args.add2archive) if class_file is not None: func(class_file, filename) diff --git a/gui.py b/gui.py index 9294fcc..978bd4b 100644 --- a/gui.py +++ b/gui.py @@ -22,10 +22,10 @@ class cfile(GObject.GObject): This class exist just to be "around" my parser.Generic_parser class, since Gtk.ListStore does not accept it. ''' - def __init__(self, path, backup): + def __init__(self, path, backup, add2archive): GObject.GObject.__init__(self) try: - self.file = mat.create_class_file(path, backup) + self.file = mat.create_class_file(path, backup, add2archive) except: self.file = None @@ -37,6 +37,7 @@ class ListStoreApp: #preferences self.backup = True self.force = False + self.add2archive = True self.window = Gtk.Window() self.window.set_title('Metadata Anonymisation Toolkit %s' % __version__) @@ -223,7 +224,7 @@ class ListStoreApp: ''' Append selected files by add_file to the self.liststore ''' - cf = cfile(item, self.backup) + cf = cfile(item, self.backup, self.add2archive) if cf.file is not None: self.liststore.append([cf, cf.file.filename, cf.file.mime,'unknow']) @@ -253,7 +254,7 @@ class ListStoreApp: hbox.pack_start(icon, False, False, 0) - table = Gtk.Table(2, 2, False)#nb rows, nb lines + table = Gtk.Table(3, 2, False)#nb rows, nb lines table.set_row_spacings(4) table.set_col_spacings(4) hbox.pack_start(table, True, True, 0) @@ -268,8 +269,15 @@ class ListStoreApp: backup.set_tooltip_text('Keep a backup copy.') backup.set_active(self.backup) + add2archive = Gtk.CheckButton('Add unsupported file to archives', False) + add2archive.connect('toggled', self.invert, 'add2archive') + add2archive.set_tooltip_text('Add non-supported (and so non-anonymised)\ + file to outputed archive.') + add2archive.set_active(self.add2archive) + table.attach_defaults(force, 0, 1, 0, 1) table.attach_defaults(backup, 0, 1, 1, 2) + table.attach_defaults(add2archive, 0, 1, 2, 3) hbox.show_all() response = dialog.run() @@ -313,7 +321,7 @@ class ListStoreApp: if self.force: self.liststore[i][0].file.remove_all() else: - if not self.liststore[i][0].is_clean(): + if not self.liststore[i][0].file.is_clean(): self.liststore[i][0].file.remove_all() self.liststore[i][3] = 'clean' @@ -326,7 +334,7 @@ class ListStoreApp: if self.force: self.liststore[i][0].file.remove_all_ugly() else: - if not self.liststore[i][0].is_clean(): + if not self.liststore[i][0].file.is_clean(): self.liststore[i][0].file.remove_all_ugly() self.liststore[i][3] = 'clean' diff --git a/lib/archive.py b/lib/archive.py index aa2a47f..c31fac6 100644 --- a/lib/archive.py +++ b/lib/archive.py @@ -13,11 +13,12 @@ class GenericArchiveStripper(parser.Generic_parser): ''' Represent a generic archive ''' - def __init__(self, realname, filename, parser, editor, backup): + def __init__(self, realname, filename, parser, editor, backup, add2archive): super(GenericArchiveStripper, self).__init__(realname, - filename, parser, editor, backup) + filename, parser, editor, backup, add2archive) self.compression = '' self.folder_list = [] + self.add2archive = add2archive def remove_folder(self): [shutil.rmtree(folder) for folder in self.folder_list] @@ -49,13 +50,18 @@ class ZipStripper(GenericArchiveStripper): zipin.extract(item) if os.path.isfile(item.filename): try: - cfile = mat.create_class_file(item.filename, False) + cfile = mat.create_class_file(item.filename, False, + self.add2archive) cfile.remove_all() logging.debug('Processing %s from %s' % (item.filename, self.filename)) + zipout.write(item.filename) except: - print('%s\' filefomart is not supported'%item.filename) - zipout.write(item.filename) + logging.info('%s\' filefomart is not supported' % + item.filename) + if self.add2archive: + zipout.write(item.filename) + mat.secure_remove(item.filename) else: self.folder_list.insert(0, item.filename) logging.info('%s treated' % self.filename) @@ -84,11 +90,15 @@ class TarStripper(GenericArchiveStripper): if current_file.type is '0': #is current_file a regular file ? #no backup file try: - cfile = mat.create_class_file(current_file.name, False) + cfile = mat.create_class_file(current_file.name, False, + self.add2archive) cfile.remove_all() + tarout.add(current_file.name, filter=self._remove) except: - print('%s\' format is not supported'%current_file.name) - tarout.add(current_file.name, filter=self._remove) + logging.info('%s\' format is not supported' % + current_file.name) + if self.add2archive: + tarout.add(current_file.name, filter=self._remove) mat.secure_remove(current_file.name) else: self.folder_list.insert(0, current_file.name) @@ -123,7 +133,8 @@ class TarStripper(GenericArchiveStripper): tarin.extract(current_file) if current_file.type is '0': #is current_file a regular file ? #no backup file - class_file = mat.create_class_file(current_file.name, False) + class_file = mat.create_class_file(current_file.name, False, + self.add2archive) if not class_file.is_clean():#if the extracted file is not clean mat.secure_remove(current_file.name) #remove it self.remove_folder() #remove all the remaining folders diff --git a/lib/mat.py b/lib/mat.py index 1d7f53b..49a84f1 100644 --- a/lib/mat.py +++ b/lib/mat.py @@ -52,14 +52,16 @@ def is_secure(filename): if not(os.path.isfile(filename)): #check if the file exist logging.error("Error: %s is not a valid file" % filename) - return + return False -def create_class_file(name, backup): +def create_class_file(name, backup, add2archive): ''' return a $FILETYPEStripper() class, corresponding to the filetype of the given file ''' - is_secure(name) + if is_secure(name): + print 'a' + return filename = "" realname = name @@ -83,4 +85,5 @@ def create_class_file(name, backup): return if editor.input.__class__ == hachoir_parser.misc.PDFDocument: return stripper_class(filename, realname, backup) - return stripper_class(realname, filename, parser, editor, backup) + return stripper_class(realname, filename, parser, editor, backup, + add2archive) diff --git a/lib/parser.py b/lib/parser.py index 2b7b52e..6e6ee34 100644 --- a/lib/parser.py +++ b/lib/parser.py @@ -5,16 +5,18 @@ import hachoir_core.error import hachoir_parser import hachoir_editor + import sys import os import subprocess import mimetypes + import mat POSTFIX = ".cleaned" class Generic_parser(object): - def __init__(self, realname, filename, parser, editor, backup): + def __init__(self, realname, filename, parser, editor, backup, add2archive): self.filename = filename self.realname = realname self.shortname = os.path.basename(filename) diff --git a/test/clitest.py b/test/clitest.py index 91b71ca..f603f97 100644 --- a/test/clitest.py +++ b/test/clitest.py @@ -17,14 +17,14 @@ class Test_Remove_cli(test.MATTest): '''make sure that the cli remove all compromizing meta''' for clean, dirty in self.file_list: subprocess.call(['../cli.py', dirty]) - current_file = mat.create_class_file(dirty, False) + current_file = mat.create_class_file(dirty, False, True) self.assertTrue(current_file.is_clean()) def test_remove_empty(self): '''Test removal with clean files''' for clean, dirty in self.file_list: subprocess.call(['../cli.py', clean]) - current_file = mat.create_class_file(clean, False) + current_file = mat.create_class_file(clean, False, True) self.assertTrue(current_file.is_clean()) diff --git a/test/libtest.py b/test/libtest.py index b35b4f3..eea8117 100644 --- a/test/libtest.py +++ b/test/libtest.py @@ -14,14 +14,14 @@ class Test_Remove_lib(test.MATTest): def test_remove(self): '''make sure that the lib remove all compromizing meta''' for clean, dirty in self.file_list: - current_file = mat.create_class_file(dirty, False) + current_file = mat.create_class_file(dirty, False, True) current_file.remove_all() self.assertTrue(current_file.is_clean()) def test_remove_empty(self): '''Test removal with clean files''' for clean, dirty in self.file_list: - current_file = mat.create_class_file(clean, False) + current_file = mat.create_class_file(clean, False, True) current_file.remove_all() self.assertTrue(current_file.is_clean()) @@ -30,14 +30,14 @@ class Test_List_lib(test.MATTest): def test_list(self): '''check if get_meta returns all the expected meta''' for clean, dirty in self.file_list: - current_file = mat.create_class_file(dirty, False) + current_file = mat.create_class_file(dirty, False, True) #FIXME assertisNotNone() : python 2.7 self.assertTrue(current_file.get_meta()) def testlist_list_empty(self): '''check that a listing of a clean file return an empty dict''' for clean, dirty in self.file_list: - current_file = mat.create_class_file(clean, False) + current_file = mat.create_class_file(clean, False, True) self.assertEqual(current_file.get_meta(), dict()) @@ -45,13 +45,13 @@ class Test_isClean_lib(test.MATTest): def test_dirty(self): '''test is_clean on clean files''' for clean, dirty in self.file_list: - current_file = mat.create_class_file(dirty, False) + current_file = mat.create_class_file(dirty, False, True) self.assertFalse(current_file.is_clean()) def test_clean(self): '''test is_clean on dirty files''' for clean, dirty in self.file_list: - current_file = mat.create_class_file(clean, False) + current_file = mat.create_class_file(clean, False, True) self.assertTrue(current_file.is_clean()) -- cgit v1.3