From de5917e5f01374bb1a647f49ae85283241a2bea9 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Sat, 18 Jun 2011 04:42:52 +0200 Subject: Creation of the arborescence --- __init__.py | 0 clean.jpg | Bin 83261 -> 0 bytes cli.py | 2 +- clitest.py | 72 --------------------- dirty.jpg | Bin 83261 -> 0 bytes hachoir_editor/__init__.pyc | Bin 603 -> 0 bytes hachoir_editor/field.pyc | Bin 3690 -> 0 bytes hachoir_editor/fieldset.pyc | Bin 14654 -> 0 bytes hachoir_editor/typed_field.pyc | Bin 11745 -> 0 bytes lib/__init__.py | 0 lib/mat.py | 142 +++++++++++++++++++++++++++++++++++++++++ libtest.py | 62 ------------------ mat.py | 141 ---------------------------------------- test.py | 29 --------- test/__init__.py | 0 test/clean.jpg | Bin 0 -> 83261 bytes test/clitest.py | 73 +++++++++++++++++++++ test/dirty.jpg | Bin 0 -> 83261 bytes test/libtest.py | 64 +++++++++++++++++++ test/test.py | 31 +++++++++ 20 files changed, 311 insertions(+), 305 deletions(-) create mode 100644 __init__.py delete mode 100644 clean.jpg delete mode 100755 clitest.py delete mode 100644 dirty.jpg delete mode 100644 hachoir_editor/__init__.pyc delete mode 100644 hachoir_editor/field.pyc delete mode 100644 hachoir_editor/fieldset.pyc delete mode 100644 hachoir_editor/typed_field.pyc create mode 100644 lib/__init__.py create mode 100755 lib/mat.py delete mode 100755 libtest.py delete mode 100755 mat.py delete mode 100755 test.py create mode 100644 test/__init__.py create mode 100644 test/clean.jpg create mode 100755 test/clitest.py create mode 100644 test/dirty.jpg create mode 100755 test/libtest.py create mode 100755 test/test.py diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/clean.jpg b/clean.jpg deleted file mode 100644 index 09c9161..0000000 Binary files a/clean.jpg and /dev/null differ diff --git a/cli.py b/cli.py index bd87409..0ba7531 100755 --- a/cli.py +++ b/cli.py @@ -4,7 +4,7 @@ ''' import sys -import mat +import lib.mat import optparse __version__ = '0.1' diff --git a/clitest.py b/clitest.py deleted file mode 100755 index edd0ddb..0000000 --- a/clitest.py +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/python -''' - Unit test for the CLI interface -''' - -import unittest -import subprocess - -import cli -import mat -import test - -class Test_Remove_cli(test.MATTest): - def test_remove(self): - '''make sure that the cli remove all compromizing meta''' - for clean, dirty in self.file_list: - subprocess.call(['./cli.py', dirty]) - current_file = mat.create_class_file(dirty) - self.assertTrue(current_file.is_clean()) - - def test_remove_empty(self): - '''Test removal with clean files''' - for clean, dirty in self.file_list: - subprocess.call(['./cli.py', clean]) - current_file = mat.create_class_file(clean) - self.assertTrue(current_file.is_clean()) - - -class Test_List_cli(test.MATTest): - def test_list_clean(self): - '''check if get_meta returns meta''' - for clean, dirty in self.file_list: - #fixme : a (clean|dirty).(jpg|pdf|...).out ? - proc = subprocess.Popen(['./cli.py', '-d', clean], - stdout=subprocess.PIPE) - stdout, stderr = proc.communicate() - self.assertEqual(stdout, "[+] File %s" % clean) - - def test_list_dirty(self): - '''check if get_meta returns all the expected meta''' - for clean, dirty in self.file_list: - proc = subprocess.Popen(['./cli.py', '-d', dirty], - stdout=subprocess.PIPE) - stdout, stderr = proc.communicate() - self.assertNotEqual(stdout, "[+] File %s" % dirty) - - -class Test_isClean_cli(test.MATTest): - #FIXME : use an external file with string as const ? - def test_clean(self): - '''test is_clean on clean files''' - for clean, dirty in self.file_list: - proc = subprocess.Popen(['./cli.py', '-c', clean], - stdout=subprocess.PIPE) - stdout, stderr = proc.communicate() - self.assertEqual(stdout.strip('\n'), '[+] %s is clean' % clean) - - def test_dirty(self): - '''test is_clean on dirty files''' - for clean, dirty in self.file_list: - proc = subprocess.Popen(['./cli.py', '-c', dirty], - stdout=subprocess.PIPE) - stdout, stderr = proc.communicate() - self.assertEqual(stdout.strip('\n'), '[+] %s is not clean' % dirty) - - -if __name__ == '__main__': - suite = unittest.TestSuite() - suite.addTest(unittest.makeSuite(Test_Remove_cli)) - suite.addTest(unittest.makeSuite(Test_List_cli)) - suite.addTest(unittest.makeSuite(Test_isClean_cli)) - unittest.TextTestRunner(verbosity=2).run(suite) diff --git a/dirty.jpg b/dirty.jpg deleted file mode 100644 index 09c9161..0000000 Binary files a/dirty.jpg and /dev/null differ diff --git a/hachoir_editor/__init__.pyc b/hachoir_editor/__init__.pyc deleted file mode 100644 index 23f4a8e..0000000 Binary files a/hachoir_editor/__init__.pyc and /dev/null differ diff --git a/hachoir_editor/field.pyc b/hachoir_editor/field.pyc deleted file mode 100644 index ff7b91d..0000000 Binary files a/hachoir_editor/field.pyc and /dev/null differ diff --git a/hachoir_editor/fieldset.pyc b/hachoir_editor/fieldset.pyc deleted file mode 100644 index f4e37fa..0000000 Binary files a/hachoir_editor/fieldset.pyc and /dev/null differ diff --git a/hachoir_editor/typed_field.pyc b/hachoir_editor/typed_field.pyc deleted file mode 100644 index 3d442ed..0000000 Binary files a/hachoir_editor/typed_field.pyc and /dev/null differ diff --git a/lib/__init__.py b/lib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/mat.py b/lib/mat.py new file mode 100755 index 0000000..48b83fb --- /dev/null +++ b/lib/mat.py @@ -0,0 +1,142 @@ +#!/usr/bin/python + +''' + Metadata anonymisation toolkit library +''' +import sys +import os + +import hachoir_core.error +import hachoir_core.field +import hachoir_core.cmd_line +import hachoir_parser +import hachoir_metadata +import hachoir_parser.image + +sys.path.append('..') +import hachoir_editor + +__version__ = "0.1" +__author__ = "jvoisin" + +POSTFIX = ".cleaned" + +class file(): + def __init__(self, realname, filename, parser, editor): + self.meta = {} + self.filename = filename + self.realname = realname + self.parser = parser + self.editor = editor + self.meta = self.__fill_meta() + + def __fill_meta(self): + metadata = {} + try: + meta = hachoir_metadata.extractMetadata(self.parser) + except hachoir_core.error.HachoirError, err: + print("Metadata extraction error: %s" % err) + + if not meta: + print("Unable to extract metadata from the file %s" % self.filename) + sys.exit(1) + + for title in meta: + #fixme i'm so dirty + if title.values != []: #if the field is not empty + value = "" + for item in title.values: + value = item.text + metadata[title.key] = value + return metadata + + def is_clean(self): + ''' + Check if the file is clean from harmful metadatas + ''' + for field in self.editor: + if self._should_remove(field): + return False + return True + + def remove_all(self): + ''' + Remove all the files that are compromizing + ''' + for field in self.editor: + if self._should_remove(field): + self._remove(field) + hachoir_core.field.writeIntoFile(self.editor, self.filename + POSTFIX) + + def _remove(self, field): + ''' + Remove the given field + ''' + del self.editor[field.name] + + + def get_meta(self): + ''' + return a dict with all the meta of the file + ''' + #am I useless ? + return self.meta + + def _should_remove(self, key): + ''' + return True if the field is compromizing + abstract method + ''' + raise NotImplementedError() + +class JpegStripper(file): + def _should_remove(self, field): + if field.name.startswith('comment'): + return True + elif field.name in ("photoshop", "exif", "adobe"): + return True + else: + return False + +class PngStripper(file): + def _should_remove(self, field): + if field.name in ('comment'): + return True + else: + return False + +strippers = { + hachoir_parser.image.JpegFile: JpegStripper, + hachoir_parser.image.PngFile: PngStripper, +} + +def create_class_file(name): + ''' + return a $FILETYPEStripper() class, + corresponding to the filetype of the given file + ''' + if not(os.path.isfile(name)): #check if the file exist + print("Error: %s is not a valid file" % name) + sys.exit(1) + + filename = "" + realname = name + filename = hachoir_core.cmd_line.unicodeFilename(name) + parser = hachoir_parser.createParser(filename) + if not parser: + print("Unable to parse the file %s : sorry" % filename) + sys.exit(1) + + editor = hachoir_editor.createEditor(parser) + try: + '''this part is a little tricky : + stripper_class will receice the name of the class $FILETYPEStripper, + (which herits from the "file" class), based on the editor + of given file (name) + ''' + stripper_class = strippers[editor.input.__class__] + except KeyError: + #Place for another lib than hachoir + print("Don't have stripper for file type: %s" % editor.description) + sys.exit(1) + return stripper_class(realname, filename, parser, editor) diff --git a/libtest.py b/libtest.py deleted file mode 100755 index 58cc618..0000000 --- a/libtest.py +++ /dev/null @@ -1,62 +0,0 @@ -#!/usr/bin/python - -''' - Unit test for the library -''' - -import mat -import unittest -import test - -class Test_Remove_lib(test.MATTest): - def test_remove(self): - '''make sure that the lib remove all compromizing meta''' - for clean, dirty in self.file_list: - current_file = mat.create_class_file(dirty) - current_file.remove_all() - self.assertTrue(current_file.is_clean()) - - def test_remove_empty(self): - '''Test removal with clean files''' - for clean, dirty in self.file_list: - current_file = mat.create_class_file(clean) - current_file.remove_all() - self.assertTrue(current_file.is_clean()) - - -class Test_List_lib(test.MATTest): - def test_list(self): - '''check if get_meta returns all the expected meta''' - for clean, dirty in self.file_list: - current_file = mat.create_class_file(dirty) - meta_list = dict({"fixme":"please"},) - self.assertEqual(current_file.get_meta(), meta_list) - - def testlist_list_empty(self): - '''check that a listing of a clean file return an empty dict''' - for clean, dirty in self.file_list: - current_file = mat.create_class_file(clean) - self.assertEqual(current_file.get_meta(), dict()) #dirty, isn't it ? - - -class Test_isClean_lib(test.MATTest): - def test_dirty(self): - '''test is_clean on clean files''' - for clean, dirty in self.file_list: - current_file = mat.create_class_file(dirty) - self.assertTrue(current_file.is_clean()) - - def test_clean(self): - '''test is_clean on dirty files''' - for clean, dirty in self.file_list: - current_file = mat.create_class_file(clean) - self.assertFalse(current_file.is_clean()) - - -if __name__ == '__main__': - suite = unittest.TestSuite() - suite.addTest(unittest.makeSuite(Test_Remove_lib)) - suite.addTest(unittest.makeSuite(Test_List_lib)) - suite.addTest(unittest.makeSuite(Test_isClean_lib)) - unittest.TextTestRunner(verbosity=2).run(suite) - diff --git a/mat.py b/mat.py deleted file mode 100755 index 200fc04..0000000 --- a/mat.py +++ /dev/null @@ -1,141 +0,0 @@ -#!/usr/bin/python - -''' - Metadata anonymisation toolkit library -''' - -import hachoir_core.error -import hachoir_core.field -import hachoir_core.cmd_line -import hachoir_parser -import hachoir_metadata -import hachoir_editor - -import sys -import os -import hachoir_parser.image - -__version__ = "0.1" -__author__ = "jvoisin" - -POSTFIX = ".cleaned" - -class file(): - def __init__(self, realname, filename, parser, editor): - self.meta = {} - self.filename = filename - self.realname = realname - self.parser = parser - self.editor = editor - self.meta = self.__fill_meta() - - def __fill_meta(self): - metadata = {} - try: - meta = hachoir_metadata.extractMetadata(self.parser) - except hachoir_core.error.HachoirError, err: - print("Metadata extraction error: %s" % err) - - if not meta: - print("Unable to extract metadata from the file %s" % self.filename) - sys.exit(1) - - for title in meta: - #fixme i'm so dirty - if title.values != []: #if the field is not empty - value = "" - for item in title.values: - value = item.text - metadata[title.key] = value - return metadata - - def is_clean(self): - ''' - Check if the file is clean from harmful metadatas - ''' - for field in self.editor: - if self._should_remove(field): - return False - return True - - def remove_all(self): - ''' - Remove all the files that are compromizing - ''' - for field in self.editor: - if self._should_remove(field): - self._remove(field) - hachoir_core.field.writeIntoFile(self.editor, self.filename + POSTFIX) - - def _remove(self, field): - ''' - Remove the given field - ''' - del self.editor[field.name] - - - def get_meta(self): - ''' - return a dict with all the meta of the file - ''' - #am I useless ? - return self.meta - - def _should_remove(self, key): - ''' - return True if the field is compromizing - abstract method - ''' - raise NotImplementedError() - -class JpegStripper(file): - def _should_remove(self, field): - if field.name.startswith('comment'): - return True - elif field.name in ("photoshop", "exif", "adobe"): - return True - else: - return False - -class PngStripper(file): - def _should_remove(self, field): - if field.name in ('comment'): - return True - else: - return False - -strippers = { - hachoir_parser.image.JpegFile: JpegStripper, - hachoir_parser.image.PngFile: PngStripper, -} - -def create_class_file(name): - ''' - return a $FILETYPEStripper() class, - corresponding to the filetype of the given file - ''' - if not(os.path.isfile(name)): #check if the file exist - print("Error: %s is not a valid file" % name) - sys.exit(1) - - filename = "" - realname = name - filename = hachoir_core.cmd_line.unicodeFilename(name) - parser = hachoir_parser.createParser(filename) - if not parser: - print("Unable to parse the file %s : sorry" % filename) - sys.exit(1) - - editor = hachoir_editor.createEditor(parser) - try: - '''this part is a little tricky : - stripper_class will receice the name of the class $FILETYPEStripper, - (which herits from the "file" class), based on the editor - of given file (name) - ''' - stripper_class = strippers[editor.input.__class__] - except KeyError: - #Place for another lib than hachoir - print("Don't have stripper for file type: %s" % editor.description) - sys.exit(1) - return stripper_class(realname, filename, parser, editor) diff --git a/test.py b/test.py deleted file mode 100755 index 3138be7..0000000 --- a/test.py +++ /dev/null @@ -1,29 +0,0 @@ -''' - Class for the testing suite : - - get the list of all test files - - create a copy of them on start - - remove the copy on end -''' - -import shutil -import glob -import tempfile -import unittest -import mat - -FILE_LIST = zip(glob.glob('clean*'), glob.glob('dirty*')) - -class MATTest(unittest.TestCase): - def setUp(self): - '''create working copy of the clean and the dirty file in the TMP dir''' - self.file_list = [] - self.tmpdir = tempfile.mkdtemp() - - for clean, dirty in FILE_LIST: - shutil.copy2(clean, self.tmpdir + clean) - shutil.copy2(dirty, self.tmpdir + dirty) - self.file_list.append((self.tmpdir + clean, self.tmpdir + dirty)) - - def tearDown(self): - '''Remove the tmp folder''' - shutil.rmtree(self.tmpdir) diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/clean.jpg b/test/clean.jpg new file mode 100644 index 0000000..09c9161 Binary files /dev/null and b/test/clean.jpg differ diff --git a/test/clitest.py b/test/clitest.py new file mode 100755 index 0000000..da0563f --- /dev/null +++ b/test/clitest.py @@ -0,0 +1,73 @@ +#!/usr/bin/python +''' + Unit test for the CLI interface +''' + +import unittest +import subprocess +import sys +sys.path.append('..') +import cli +from lib import mat +import test + +class Test_Remove_cli(test.MATTest): + def test_remove(self): + '''make sure that the cli remove all compromizing meta''' + for clean, dirty in self.file_list: + subprocess.call(['../cli.py', dirty]) + current_file = mat.create_class_file(dirty) + self.assertTrue(current_file.is_clean()) + + def test_remove_empty(self): + '''Test removal with clean files''' + for clean, dirty in self.file_list: + subprocess.call(['../cli.py', clean]) + current_file = mat.create_class_file(clean) + self.assertTrue(current_file.is_clean()) + + +class Test_List_cli(test.MATTest): + def test_list_clean(self): + '''check if get_meta returns meta''' + for clean, dirty in self.file_list: + #fixme : a (clean|dirty).(jpg|pdf|...).out ? + proc = subprocess.Popen(['../cli.py', '-d', clean], + stdout=subprocess.PIPE) + stdout, stderr = proc.communicate() + self.assertEqual(stdout, "[+] File %s" % clean) + + def test_list_dirty(self): + '''check if get_meta returns all the expected meta''' + for clean, dirty in self.file_list: + proc = subprocess.Popen(['../cli.py', '-d', dirty], + stdout=subprocess.PIPE) + stdout, stderr = proc.communicate() + self.assertNotEqual(stdout, "[+] File %s" % dirty) + + +class Test_isClean_cli(test.MATTest): + #FIXME : use an external file with string as const ? + def test_clean(self): + '''test is_clean on clean files''' + for clean, dirty in self.file_list: + proc = subprocess.Popen(['../cli.py', '-c', clean], + stdout=subprocess.PIPE) + stdout, stderr = proc.communicate() + self.assertEqual(stdout.strip('\n'), '[+] %s is clean' % clean) + + def test_dirty(self): + '''test is_clean on dirty files''' + for clean, dirty in self.file_list: + proc = subprocess.Popen(['../cli.py', '-c', dirty], + stdout=subprocess.PIPE) + stdout, stderr = proc.communicate() + self.assertEqual(stdout.strip('\n'), '[+] %s is not clean' % dirty) + + +if __name__ == '__main__': + suite = unittest.TestSuite() + suite.addTest(unittest.makeSuite(Test_Remove_cli)) + suite.addTest(unittest.makeSuite(Test_List_cli)) + suite.addTest(unittest.makeSuite(Test_isClean_cli)) + unittest.TextTestRunner(verbosity=2).run(suite) diff --git a/test/dirty.jpg b/test/dirty.jpg new file mode 100644 index 0000000..09c9161 Binary files /dev/null and b/test/dirty.jpg differ diff --git a/test/libtest.py b/test/libtest.py new file mode 100755 index 0000000..2bd1fa7 --- /dev/null +++ b/test/libtest.py @@ -0,0 +1,64 @@ +#!/usr/bin/python + +''' + Unit test for the library +''' + +import unittest +import test +import sys +sys.path.append('..') +from lib import mat + +class Test_Remove_lib(test.MATTest): + def test_remove(self): + '''make sure that the lib remove all compromizing meta''' + for clean, dirty in self.file_list: + current_file = mat.create_class_file(dirty) + current_file.remove_all() + self.assertTrue(current_file.is_clean()) + + def test_remove_empty(self): + '''Test removal with clean files''' + for clean, dirty in self.file_list: + current_file = mat.create_class_file(clean) + current_file.remove_all() + self.assertTrue(current_file.is_clean()) + + +class Test_List_lib(test.MATTest): + def test_list(self): + '''check if get_meta returns all the expected meta''' + for clean, dirty in self.file_list: + current_file = mat.create_class_file(dirty) + meta_list = dict({"fixme":"please"},) + self.assertEqual(current_file.get_meta(), meta_list) + + def testlist_list_empty(self): + '''check that a listing of a clean file return an empty dict''' + for clean, dirty in self.file_list: + current_file = mat.create_class_file(clean) + self.assertEqual(current_file.get_meta(), dict()) #dirty, isn't it ? + + +class Test_isClean_lib(test.MATTest): + def test_dirty(self): + '''test is_clean on clean files''' + for clean, dirty in self.file_list: + current_file = mat.create_class_file(dirty) + self.assertTrue(current_file.is_clean()) + + def test_clean(self): + '''test is_clean on dirty files''' + for clean, dirty in self.file_list: + current_file = mat.create_class_file(clean) + self.assertFalse(current_file.is_clean()) + + +if __name__ == '__main__': + suite = unittest.TestSuite() + suite.addTest(unittest.makeSuite(Test_Remove_lib)) + suite.addTest(unittest.makeSuite(Test_List_lib)) + suite.addTest(unittest.makeSuite(Test_isClean_lib)) + unittest.TextTestRunner(verbosity=2).run(suite) + diff --git a/test/test.py b/test/test.py new file mode 100755 index 0000000..f095157 --- /dev/null +++ b/test/test.py @@ -0,0 +1,31 @@ +''' + Class for the testing suite : + - get the list of all test files + - create a copy of them on start + - remove the copy on end +''' + +import shutil +import glob +import sys +import tempfile +import unittest +sys.path.append('..') +from lib import mat + +FILE_LIST = zip(glob.glob('clean*'), glob.glob('dirty*')) + +class MATTest(unittest.TestCase): + def setUp(self): + '''create working copy of the clean and the dirty file in the TMP dir''' + self.file_list = [] + self.tmpdir = tempfile.mkdtemp() + + for clean, dirty in FILE_LIST: + shutil.copy2(clean, self.tmpdir + clean) + shutil.copy2(dirty, self.tmpdir + dirty) + self.file_list.append((self.tmpdir + clean, self.tmpdir + dirty)) + + def tearDown(self): + '''Remove the tmp folder''' + shutil.rmtree(self.tmpdir) -- cgit v1.3