summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorjvoisin2011-06-20 01:25:33 +0200
committerjvoisin2011-06-20 01:25:33 +0200
commit52f2fedd5d73807d42ba5c397c3e4c5348b47a47 (patch)
tree09070e23428f3f72e4a95da8df33520adacbf01f /lib
parentde5917e5f01374bb1a647f49ae85283241a2bea9 (diff)
Introduction of a nice separation of functions/class in differents files
Diffstat (limited to '')
-rw-r--r--lib/__init__.py1
-rw-r--r--lib/images.py17
-rw-r--r--[-rwxr-xr-x]lib/mat.py99
-rw-r--r--lib/parser.py79
4 files changed, 102 insertions, 94 deletions
diff --git a/lib/__init__.py b/lib/__init__.py
index e69de29..8b13789 100644
--- a/lib/__init__.py
+++ b/lib/__init__.py
@@ -0,0 +1 @@
diff --git a/lib/images.py b/lib/images.py
new file mode 100644
index 0000000..21229c2
--- /dev/null
+++ b/lib/images.py
@@ -0,0 +1,17 @@
1import parser
2
3class JpegStripper(parser.Generic_parser):
4 def _should_remove(self, field):
5 if field.name.startswith('comment'):
6 return True
7 elif field.name in ("photoshop", "exif", "adobe"):
8 return True
9 else:
10 return False
11
12class PngStripper(parser.Generic_parser):
13 def _should_remove(self, field):
14 if field.name in ('comment'):
15 return True
16 else:
17 return False
diff --git a/lib/mat.py b/lib/mat.py
index 48b83fb..5641c62 100755..100644
--- a/lib/mat.py
+++ b/lib/mat.py
@@ -3,111 +3,22 @@
3''' 3'''
4 Metadata anonymisation toolkit library 4 Metadata anonymisation toolkit library
5''' 5'''
6
6import sys 7import sys
7import os 8import os
8 9
9import hachoir_core.error
10import hachoir_core.field
11import hachoir_core.cmd_line 10import hachoir_core.cmd_line
12import hachoir_parser 11import hachoir_parser
13import hachoir_metadata
14import hachoir_parser.image
15
16sys.path.append('..')
17import hachoir_editor 12import hachoir_editor
18 13
14import images
15
19__version__ = "0.1" 16__version__ = "0.1"
20__author__ = "jvoisin" 17__author__ = "jvoisin"
21 18
22POSTFIX = ".cleaned"
23
24class file():
25 def __init__(self, realname, filename, parser, editor):
26 self.meta = {}
27 self.filename = filename
28 self.realname = realname
29 self.parser = parser
30 self.editor = editor
31 self.meta = self.__fill_meta()
32
33 def __fill_meta(self):
34 metadata = {}
35 try:
36 meta = hachoir_metadata.extractMetadata(self.parser)
37 except hachoir_core.error.HachoirError, err:
38 print("Metadata extraction error: %s" % err)
39
40 if not meta:
41 print("Unable to extract metadata from the file %s" % self.filename)
42 sys.exit(1)
43
44 for title in meta:
45 #fixme i'm so dirty
46 if title.values != []: #if the field is not empty
47 value = ""
48 for item in title.values:
49 value = item.text
50 metadata[title.key] = value
51 return metadata
52
53 def is_clean(self):
54 '''
55 Check if the file is clean from harmful metadatas
56 '''
57 for field in self.editor:
58 if self._should_remove(field):
59 return False
60 return True
61
62 def remove_all(self):
63 '''
64 Remove all the files that are compromizing
65 '''
66 for field in self.editor:
67 if self._should_remove(field):
68 self._remove(field)
69 hachoir_core.field.writeIntoFile(self.editor, self.filename + POSTFIX)
70
71 def _remove(self, field):
72 '''
73 Remove the given field
74 '''
75 del self.editor[field.name]
76
77
78 def get_meta(self):
79 '''
80 return a dict with all the meta of the file
81 '''
82 #am I useless ?
83 return self.meta
84
85 def _should_remove(self, key):
86 '''
87 return True if the field is compromizing
88 abstract method
89 '''
90 raise NotImplementedError()
91
92class JpegStripper(file):
93 def _should_remove(self, field):
94 if field.name.startswith('comment'):
95 return True
96 elif field.name in ("photoshop", "exif", "adobe"):
97 return True
98 else:
99 return False
100
101class PngStripper(file):
102 def _should_remove(self, field):
103 if field.name in ('comment'):
104 return True
105 else:
106 return False
107
108strippers = { 19strippers = {
109 hachoir_parser.image.JpegFile: JpegStripper, 20 hachoir_parser.image.JpegFile: images.JpegStripper,
110 hachoir_parser.image.PngFile: PngStripper, 21 hachoir_parser.image.PngFile: images.PngStripper,
111} 22}
112 23
113def create_class_file(name): 24def create_class_file(name):
diff --git a/lib/parser.py b/lib/parser.py
new file mode 100644
index 0000000..828648f
--- /dev/null
+++ b/lib/parser.py
@@ -0,0 +1,79 @@
1'''
2 Parent class of all parser
3'''
4
5import hachoir_core.error
6import hachoir_parser
7import hachoir_metadata
8import hachoir_editor
9import sys
10
11POSTFIX = ".cleaned"
12
13class Generic_parser():
14 def __init__(self, realname, filename, parser, editor):
15 self.meta = {}
16 self.filename = filename
17 self.realname = realname
18 self.parser = parser
19 self.editor = editor
20 self.meta = self.__fill_meta()
21
22 def __fill_meta(self):
23 metadata = {}
24 try:
25 meta = hachoir_metadata.extractMetadata(self.parser)
26 except hachoir_core.error.HachoirError, err:
27 print("Metadata extraction error: %s" % err)
28
29 if not meta:
30 print("Unable to extract metadata from the file %s" % self.filename)
31 sys.exit(1)
32
33 for title in meta:
34 #fixme i'm so dirty
35 if title.values != []: #if the field is not empty
36 value = ""
37 for item in title.values:
38 value = item.text
39 metadata[title.key] = value
40 return metadata
41
42 def is_clean(self):
43 '''
44 Check if the file is clean from harmful metadatas
45 '''
46 for field in self.editor:
47 if self._should_remove(field):
48 return False
49 return True
50
51 def remove_all(self):
52 '''
53 Remove all the files that are compromizing
54 '''
55 for field in self.editor:
56 if self._should_remove(field):
57 self._remove(field)
58 hachoir_core.field.writeIntoFile(self.editor, self.filename + POSTFIX)
59
60 def _remove(self, field):
61 '''
62 Remove the given field
63 '''
64 del self.editor[field.name]
65
66
67 def get_meta(self):
68 '''
69 return a dict with all the meta of the file
70 '''
71 #am I useless ?
72 return self.meta
73
74 def _should_remove(self, key):
75 '''
76 return True if the field is compromizing
77 abstract method
78 '''
79 raise NotImplementedError()