summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjvoisin2011-06-10 01:29:29 +0200
committerjvoisin2011-06-10 01:29:29 +0200
commitc308cf7daaa4fa46377e2df0f2e9a397981e19b2 (patch)
treef016ce17cd6747acc068a7d2fc5093d1bd96fa9e
parentf7082a21d6511c5069fbb9ff186ce22f3e22fed7 (diff)
The current version is (mostly) working
-rw-r--r--README0
-rw-r--r--clean.jpgbin0 -> 83261 bytes
-rw-r--r--cli.py77
-rw-r--r--clitest.py (renamed from lib/test.py)40
-rw-r--r--dirty.jpgbin0 -> 83261 bytes
-rw-r--r--hachoir_editor/__init__.py (renamed from lib/hachoir_editor/__init__.py)0
-rw-r--r--hachoir_editor/__init__.pycbin0 -> 603 bytes
-rw-r--r--hachoir_editor/field.py (renamed from lib/hachoir_editor/field.py)0
-rw-r--r--hachoir_editor/field.pycbin0 -> 3690 bytes
-rw-r--r--hachoir_editor/fieldset.py (renamed from lib/hachoir_editor/fieldset.py)0
-rw-r--r--hachoir_editor/fieldset.pycbin0 -> 14654 bytes
-rw-r--r--hachoir_editor/typed_field.py (renamed from lib/hachoir_editor/typed_field.py)0
-rw-r--r--hachoir_editor/typed_field.pycbin0 -> 11745 bytes
-rw-r--r--lib/check/images/jpg.py17
-rw-r--r--lib/mat.py104
-rw-r--r--lib/strippers.py3
-rw-r--r--libtest.py56
-rw-r--r--mat.py117
-rw-r--r--test.py29
19 files changed, 290 insertions, 153 deletions
diff --git a/README b/README
deleted file mode 100644
index e69de29..0000000
--- a/README
+++ /dev/null
diff --git a/clean.jpg b/clean.jpg
new file mode 100644
index 0000000..09c9161
--- /dev/null
+++ b/clean.jpg
Binary files differ
diff --git a/cli.py b/cli.py
index d249917..fedb40c 100644
--- a/cli.py
+++ b/cli.py
@@ -3,7 +3,82 @@
3 Metadata anonymisation toolkit 3 Metadata anonymisation toolkit
4""" 4"""
5 5
6import lib
7import sys 6import sys
7import mat
8import argparse
8 9
10__version__ = "0.1"
9 11
12def parsing():
13 '''
14 Parse the arguments,
15 and returns a dict
16 '''
17 parser = argparse.ArgumentParser(version=__version__,
18 description="Metadata Anonymisation Toolkit - CLI %s" % __version__)
19
20 #list and check clean are mutually exclusives
21 group = parser.add_mutually_exclusive_group()
22
23 #list meta
24 group.add_argument('--print-meta', '-p', action="store_true", default=False,
25 dest='just_list', help='List all the meta of a file,\
26 without removing them')
27
28 #check if the file is clean
29 group.add_argument('--check-clean', '-c', action="store_true",
30 default=False, dest='just_check',
31 help='Check if a file is clean of harmfull metadatas')
32
33 #list of files to process
34 parser.add_argument('filelist', action="store", type=str, nargs="+",
35 metavar='file', help='File(s) to process')
36
37 return parser.parse_args()
38
39def list_meta(class_file, filename):
40 '''
41 Print all the meta of "filename" on stdout
42 '''
43 print("[+] File %s :" % filename)
44 for key, item in class_file.get_meta().iteritems():
45 print("\t%s : %s" % (key, item) )
46
47def is_clean(class_file, filename):
48 '''
49 Say if "filename" is clean or not
50 '''
51 if class_file.is_clean():
52 print("[+] %s is clean" % filename)
53 else:
54 print("[+] %s is not clean" % filename)
55
56def clean_meta(class_file, filename):
57 '''
58 Clean the file "filename"
59 '''
60 print("[+] Cleaning %s" % filename)
61 if class_file.is_clean():
62 print("%s is already clean" % filename)
63 else:
64 class_file.remove_all()
65 print("%s cleaned !" % filename)
66
67def main():
68 args = parsing()
69
70 #func receive the function correponding to the options given as parameters
71 if args.just_list is True: #only print metadatas
72 func = list_meta
73 elif args.just_check is True: #only check if the file is clean
74 func = is_clean
75 else: #clean the file
76 func = clean_meta
77
78 for filename in args.filelist:
79 class_file = mat.create_class_file(filename)
80 func(class_file, filename)
81 print("\n")
82
83if __name__ == '__main__':
84 main()
diff --git a/lib/test.py b/clitest.py
index b1ff2a3..00955ae 100644
--- a/lib/test.py
+++ b/clitest.py
@@ -1,45 +1,29 @@
1import mat 1import cli
2import unittest 2import unittest
3import shutil 3import test
4import glob
5import tempfile
6 4
7FILE_LIST = zip(glob.glob('clean*'), glob.glob('dirty*')) 5import shlex
6import subprocess
8 7
9class MATTest(unittest.TestCase): 8class Test_Remove_cli(test.MATTest):
10 def setUp(self):
11 '''create working copy of the clean and the dirty file in the TMP dir'''
12 self.file_list = []
13 self.tmpdir = tempfile.mkdtemp()
14
15 for clean, dirty in FILE_LIST:
16 shutil.copy2(clean, self.tmpdir + clean)
17 shutil.copy2(dirty, self.tmpdir + dirty)
18 self.file_list.append((self.tmpdir + clean, self.tmpdir + dirty))
19
20 def tearDown(self):
21 '''Remove the tmp folder'''
22 shutil.rmtree(self.tmpdir)
23
24class Test_Remove(MATTest):
25 def test_remove(self): 9 def test_remove(self):
26 '''make sure that the lib remove all compromizing meta''' 10 '''make sure that the cli remove all compromizing meta'''
27 for clean, dirty in self.file_list: 11 for clean, dirty in self.file_list:
28 mat.file(dirty).remove_all() 12 subprocess.call("cli.py %s" dirty)
29 self.assertTrue(mat.file(dirty).is_clean()) 13 self.assertTrue(mat.file(dirty).is_clean())
30 14
31 def test_remove_empty(self): 15 def test_remove_empty(self):
32 '''Test removal with clean files''' 16 '''Test removal with clean files'''
33 for clean, dirty in self.file_list: 17 for clean, dirty in self.file_list:
34 mat.file(clean).remove_all() 18 subprocess.call("cli.py %s" clean)
35 self.assertTrue(mat.file(clean).is_clean()) 19 self.assertTrue(mat.file(dirty).is_clean())
36 20
37 21
38class Test_List(MATTest): 22class Test_List_cli(test.MATTest):
39 def test_list(self): 23 def test_list(self):
40 '''check if get_meta returns all the expected meta''' 24 '''check if get_meta returns all the expected meta'''
41 for clean, dirty in self.file_list: 25 for clean, dirty in self.file_list:
42 meta_list = dict() #FIXME 26 meta_list = dict("fixme":"please",) #FIXME
43 self.assertDictEqual(mat.file(dirty).get_meta(), meta_list) 27 self.assertDictEqual(mat.file(dirty).get_meta(), meta_list)
44 28
45 def testlist_list_empty(self): 29 def testlist_list_empty(self):
@@ -48,7 +32,7 @@ class Test_List(MATTest):
48 self.assertEqual(mat.file(clean).get_meta(), None) 32 self.assertEqual(mat.file(clean).get_meta(), None)
49 33
50 34
51class Test_isClean(MATTest): 35class Test_isClean_cli(test.MATTest):
52 def test_clean(self): 36 def test_clean(self):
53 '''test is_clean on clean files''' 37 '''test is_clean on clean files'''
54 for clean, dirty in self.file_list: 38 for clean, dirty in self.file_list:
diff --git a/dirty.jpg b/dirty.jpg
new file mode 100644
index 0000000..09c9161
--- /dev/null
+++ b/dirty.jpg
Binary files differ
diff --git a/lib/hachoir_editor/__init__.py b/hachoir_editor/__init__.py
index b106278..b106278 100644
--- a/lib/hachoir_editor/__init__.py
+++ b/hachoir_editor/__init__.py
diff --git a/hachoir_editor/__init__.pyc b/hachoir_editor/__init__.pyc
new file mode 100644
index 0000000..23f4a8e
--- /dev/null
+++ b/hachoir_editor/__init__.pyc
Binary files differ
diff --git a/lib/hachoir_editor/field.py b/hachoir_editor/field.py
index 6b1efe3..6b1efe3 100644
--- a/lib/hachoir_editor/field.py
+++ b/hachoir_editor/field.py
diff --git a/hachoir_editor/field.pyc b/hachoir_editor/field.pyc
new file mode 100644
index 0000000..ff7b91d
--- /dev/null
+++ b/hachoir_editor/field.pyc
Binary files differ
diff --git a/lib/hachoir_editor/fieldset.py b/hachoir_editor/fieldset.py
index 1669b5a..1669b5a 100644
--- a/lib/hachoir_editor/fieldset.py
+++ b/hachoir_editor/fieldset.py
diff --git a/hachoir_editor/fieldset.pyc b/hachoir_editor/fieldset.pyc
new file mode 100644
index 0000000..f4e37fa
--- /dev/null
+++ b/hachoir_editor/fieldset.pyc
Binary files differ
diff --git a/lib/hachoir_editor/typed_field.py b/hachoir_editor/typed_field.py
index 4abc989..4abc989 100644
--- a/lib/hachoir_editor/typed_field.py
+++ b/hachoir_editor/typed_field.py
diff --git a/hachoir_editor/typed_field.pyc b/hachoir_editor/typed_field.pyc
new file mode 100644
index 0000000..3d442ed
--- /dev/null
+++ b/hachoir_editor/typed_field.pyc
Binary files differ
diff --git a/lib/check/images/jpg.py b/lib/check/images/jpg.py
deleted file mode 100644
index 7f29587..0000000
--- a/lib/check/images/jpg.py
+++ /dev/null
@@ -1,17 +0,0 @@
1import hachoir_core.error
2import hachoir_core.cmd_line
3import hachoir_parser
4import hachoir_metadata
5import sys
6import mat
7
8
9class JpegStripper(file):
10 def checkField(self, field):
11 print(field.description)
12 if field.name.startswith("comment"):
13 return True
14 return field.name in ("photoshop", "exif", "adobe")
15 return False
16
17
diff --git a/lib/mat.py b/lib/mat.py
deleted file mode 100644
index d22c9ab..0000000
--- a/lib/mat.py
+++ /dev/null
@@ -1,104 +0,0 @@
1import hachoir_core.error
2import hachoir_core.cmd_line
3import hachoir_parser
4import hachoir_metadata
5
6from strippers import *
7
8from hachoir_editor import (createEditor,
9 NewFieldSet, EditableInteger, EditableBytes)
10
11import hachoir_editor
12
13import sys
14
15__version__ = "0.1"
16__author__ = "jvoisin"
17
18
19class file():
20 def __init__(self, filename):
21 self.metadata = {}
22 self.clean = False
23 self.editor = createEditor(self.parser)
24 self.filename = filename
25 self.filename, self.realname = hachoir_core.cmd_line.unicodeFilename(
26 self.filename), self.filename
27 self.parser = hachoir_parser.createParser(self.filename, self.realname)
28
29 if not self.parser:
30 print("Unable to parse file : sorry")
31 sys.exit(1)
32
33 try:
34 self.meta = hachoir_metadata.extractMetadata(self.parser)
35 except hachoir_core.error.HachoirError, err:
36 print "Metadata extraction error: %s" % unicode(err)
37 self.data = None
38
39 if not self.meta:
40 print "Unable to extract metadata"
41 sys.exit(1)
42
43 def is_clean(self):
44 '''
45 Return true if the file is clean from any compromizing meta
46 '''
47 return self.clean
48
49 def remove_all(self):
50 '''
51 Remove all the files that are compromizing
52 '''
53 stripEditor(self.editor, self.realname, level, not(values.quiet))
54 for key, field in metadata:
55 if should_remove(key):
56 remove(self, key)
57
58 def remove(self, field):
59 '''
60 Remove the given file
61 '''
62 del editor[field]
63 return True
64
65
66 def get_meta(self):
67 '''return a dict with all the meta of the file'''
68 #FIXME : sooooooooooo dirty !
69 for title in self.meta:
70 if title.values != []: #if the field is not empty
71 value = ""
72 for item in title.values:
73 value = item.text
74 self.metadata[title.key] = value
75 return self.metadata
76
77 def should_remove(self, field):
78 '''
79 return True if the field is compromizing
80 abstract method
81 '''
82 raise NotImplementedError()
83
84def stripEditor(editor, filename, realname, level, verbose):
85 '''
86 Assign a stripper to an editor
87 '''
88 cls = editor.input.__class__
89 try:
90 stripper_cls = strippers[cls]
91 except KeyError:
92 print "Don't have stripper for file type: %s" % editor.description
93 return False
94 stripper = stripper_cls(editor, level, verbose)
95
96 if stripper():
97 output = FileOutputStream(filename, realname)
98 editor.writeInto(output)
99
100 else:
101 print _("Stripper doesn't touch the file")
102 return True
103
104file(sys.argv[1]).get_meta()
diff --git a/lib/strippers.py b/lib/strippers.py
deleted file mode 100644
index 70d0fc7..0000000
--- a/lib/strippers.py
+++ /dev/null
@@ -1,3 +0,0 @@
1strippers = {
2 JpegFile: JpegStripper,
3}
diff --git a/libtest.py b/libtest.py
new file mode 100644
index 0000000..191a981
--- /dev/null
+++ b/libtest.py
@@ -0,0 +1,56 @@
1import mat
2import unittest
3import test
4
5class Test_Remove_lib(test.MATTest):
6 def test_remove(self):
7 '''make sure that the lib remove all compromizing meta'''
8 for clean, dirty in self.file_list:
9 current_file = mat.create_class_file(dirty)
10 current_file.remove_all()
11 self.assertTrue(current_file.is_clean())
12
13 def test_remove_empty(self):
14 '''Test removal with clean files'''
15 for clean, dirty in self.file_list:
16 current_file = mat.create_class_file(clean)
17 current_file.remove_all()
18 self.assertTrue(current_file.is_clean())
19
20
21class Test_List_lib(test.MATTest):
22 def test_list(self):
23 '''check if get_meta returns all the expected meta'''
24 for clean, dirty in self.file_list:
25 current_file = mat.create_class_file(dirty)
26 meta_list = dict({"fixme":"please"},)
27 self.assertDictEqual(current_file.get_meta(), meta_list)
28
29 def testlist_list_empty(self):
30 '''check that a listing of a clean file return an empty dict'''
31 for clean, dirty in self.file_list:
32 current_file = mat.create_class_file(clean)
33 self.assertEqual(current_file.get_meta(), dict()) #dirty, isn't it ?
34
35
36class Test_isClean_lib(test.MATTest):
37 def test_clean(self):
38 '''test is_clean on clean files'''
39 for clean, dirty in self.file_list:
40 current_file = mat.create_class_file(dirty)
41 self.assertTrue(current_file.is_clean())
42
43 def test_clean(self):
44 '''test is_clean on dirty files'''
45 for clean, dirty in self.file_list:
46 current_file = mat.create_class_file(clean)
47 self.assertFalse(current_file.is_clean())
48
49
50if __name__ == '__main__':
51 suite = unittest.TestSuite()
52 suite.addTest(unittest.makeSuite(Test_Remove_lib))
53 suite.addTest(unittest.makeSuite(Test_List_lib))
54 suite.addTest(unittest.makeSuite(Test_isClean_lib))
55 unittest.TextTestRunner(verbosity=2).run(suite)
56
diff --git a/mat.py b/mat.py
new file mode 100644
index 0000000..8a3afcd
--- /dev/null
+++ b/mat.py
@@ -0,0 +1,117 @@
1import hachoir_core.error
2import hachoir_core.cmd_line
3import hachoir_parser
4import hachoir_metadata
5import hachoir_editor
6
7import sys
8import os
9import hachoir_parser.image
10
11__version__ = "0.1"
12__author__ = "jvoisin"
13
14
15class file():
16 def __init__(self, realname, filename, parser, editor):
17 self.meta = {}
18 self.clean = False
19 self.filename = filename
20 self.realname = realname
21 self.parser = parser
22 self.editor = editor
23 self.meta = self.__fill_meta()
24
25 def __fill_meta(self):
26 metadata = {}
27 try:
28 meta = hachoir_metadata.extractMetadata(self.parser)
29 except hachoir_core.error.HachoirError, err:
30 print("Metadata extraction error: %s" % err)
31
32 if not meta:
33 print("Unable to extract metadata from the file %s" % self.filename)
34 sys.exit(1)
35
36 for title in meta:
37 #fixme i'm so dirty
38 if title.values != []: #if the field is not empty
39 value = ""
40 for item in title.values:
41 value = item.text
42 metadata[title.key] = value
43 return metadata
44
45 def is_clean(self):
46 '''
47 Return true if the file is clean from any compromizing meta
48 '''
49 return self.clean
50
51 def remove_all(self):
52 '''
53 Remove all the files that are compromizing
54 '''
55 for key, field in self.meta.iteritems():
56 if self._should_remove(key):
57 print "BLEH" #DEBUG
58 #__remove(self, key)
59 #self.clean = True
60
61 def __remove(self, field):
62 '''
63 Remove the given file
64 '''
65 del self.editor[field]
66
67
68 def get_meta(self):
69 '''
70 return a dict with all the meta of the file
71 '''
72 return self.meta
73
74 def _should_remove(self, field):
75 '''
76 return True if the field is compromizing
77 abstract method
78 '''
79 raise NotImplementedError()
80
81class JpegStripper(file):
82 def _should_remove(self, field):
83 return False
84
85strippers = {
86 hachoir_parser.image.JpegFile: JpegStripper,
87}
88
89def create_class_file(name):
90 '''
91 return a $FILETYPEStripper() class,
92 corresponding to the filetype of the given file
93 '''
94 if not(os.path.isfile(name)): #check if the file exist
95 print("Error: %s is not a valid file" % name)
96 sys.exit(1)
97
98 filename = ""
99 realname = name
100 filename = hachoir_core.cmd_line.unicodeFilename(name)
101 parser = hachoir_parser.createParser(filename, realname)
102 if not parser:
103 print("Unable to parse the file %s : sorry" % filename)
104 sys.exit(1)
105
106 editor = hachoir_editor.createEditor(parser)
107 try:
108 '''this part is a little tricky :
109 stripper_class will receice the name of the class $FILETYPEStripper,
110 (which herits from the "file" class), based on the editor
111 of given file (name)
112 '''
113 stripper_class = strippers[editor.input.__class__]
114 except KeyError:
115 print("Don't have stripper for file type: %s" % editor.description)
116 sys.exit(1)
117 return stripper_class(realname, filename, parser, editor)
diff --git a/test.py b/test.py
new file mode 100644
index 0000000..3138be7
--- /dev/null
+++ b/test.py
@@ -0,0 +1,29 @@
1'''
2 Class for the testing suite :
3 - get the list of all test files
4 - create a copy of them on start
5 - remove the copy on end
6'''
7
8import shutil
9import glob
10import tempfile
11import unittest
12import mat
13
14FILE_LIST = zip(glob.glob('clean*'), glob.glob('dirty*'))
15
16class MATTest(unittest.TestCase):
17 def setUp(self):
18 '''create working copy of the clean and the dirty file in the TMP dir'''
19 self.file_list = []
20 self.tmpdir = tempfile.mkdtemp()
21
22 for clean, dirty in FILE_LIST:
23 shutil.copy2(clean, self.tmpdir + clean)
24 shutil.copy2(dirty, self.tmpdir + dirty)
25 self.file_list.append((self.tmpdir + clean, self.tmpdir + dirty))
26
27 def tearDown(self):
28 '''Remove the tmp folder'''
29 shutil.rmtree(self.tmpdir)