summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorjvoisin2011-06-18 04:42:52 +0200
committerjvoisin2011-06-18 04:42:52 +0200
commitde5917e5f01374bb1a647f49ae85283241a2bea9 (patch)
treefbe5483af79965d1445bd4aaa528f0ad3e48a8aa /lib
parent0523e034870ed80cc3916ebb78552d661de4d3b0 (diff)
Creation of the arborescence
Diffstat (limited to 'lib')
-rw-r--r--lib/__init__.py0
-rwxr-xr-xlib/mat.py142
2 files changed, 142 insertions, 0 deletions
diff --git a/lib/__init__.py b/lib/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/lib/__init__.py
diff --git a/lib/mat.py b/lib/mat.py
new file mode 100755
index 0000000..48b83fb
--- /dev/null
+++ b/lib/mat.py
@@ -0,0 +1,142 @@
1#!/usr/bin/python
2
3'''
4 Metadata anonymisation toolkit library
5'''
6import sys
7import os
8
9import hachoir_core.error
10import hachoir_core.field
11import hachoir_core.cmd_line
12import hachoir_parser
13import hachoir_metadata
14import hachoir_parser.image
15
16sys.path.append('..')
17import hachoir_editor
18
19__version__ = "0.1"
20__author__ = "jvoisin"
21
22POSTFIX = ".cleaned"
23
24class file():
25 def __init__(self, realname, filename, parser, editor):
26 self.meta = {}
27 self.filename = filename
28 self.realname = realname
29 self.parser = parser
30 self.editor = editor
31 self.meta = self.__fill_meta()
32
33 def __fill_meta(self):
34 metadata = {}
35 try:
36 meta = hachoir_metadata.extractMetadata(self.parser)
37 except hachoir_core.error.HachoirError, err:
38 print("Metadata extraction error: %s" % err)
39
40 if not meta:
41 print("Unable to extract metadata from the file %s" % self.filename)
42 sys.exit(1)
43
44 for title in meta:
45 #fixme i'm so dirty
46 if title.values != []: #if the field is not empty
47 value = ""
48 for item in title.values:
49 value = item.text
50 metadata[title.key] = value
51 return metadata
52
53 def is_clean(self):
54 '''
55 Check if the file is clean from harmful metadatas
56 '''
57 for field in self.editor:
58 if self._should_remove(field):
59 return False
60 return True
61
62 def remove_all(self):
63 '''
64 Remove all the files that are compromizing
65 '''
66 for field in self.editor:
67 if self._should_remove(field):
68 self._remove(field)
69 hachoir_core.field.writeIntoFile(self.editor, self.filename + POSTFIX)
70
71 def _remove(self, field):
72 '''
73 Remove the given field
74 '''
75 del self.editor[field.name]
76
77
78 def get_meta(self):
79 '''
80 return a dict with all the meta of the file
81 '''
82 #am I useless ?
83 return self.meta
84
85 def _should_remove(self, key):
86 '''
87 return True if the field is compromizing
88 abstract method
89 '''
90 raise NotImplementedError()
91
92class JpegStripper(file):
93 def _should_remove(self, field):
94 if field.name.startswith('comment'):
95 return True
96 elif field.name in ("photoshop", "exif", "adobe"):
97 return True
98 else:
99 return False
100
101class PngStripper(file):
102 def _should_remove(self, field):
103 if field.name in ('comment'):
104 return True
105 else:
106 return False
107
108strippers = {
109 hachoir_parser.image.JpegFile: JpegStripper,
110 hachoir_parser.image.PngFile: PngStripper,
111}
112
113def create_class_file(name):
114 '''
115 return a $FILETYPEStripper() class,
116 corresponding to the filetype of the given file
117 '''
118 if not(os.path.isfile(name)): #check if the file exist
119 print("Error: %s is not a valid file" % name)
120 sys.exit(1)
121
122 filename = ""
123 realname = name
124 filename = hachoir_core.cmd_line.unicodeFilename(name)
125 parser = hachoir_parser.createParser(filename)
126 if not parser:
127 print("Unable to parse the file %s : sorry" % filename)
128 sys.exit(1)
129
130 editor = hachoir_editor.createEditor(parser)
131 try:
132 '''this part is a little tricky :
133 stripper_class will receice the name of the class $FILETYPEStripper,
134 (which herits from the "file" class), based on the editor
135 of given file (name)
136 '''
137 stripper_class = strippers[editor.input.__class__]
138 except KeyError:
139 #Place for another lib than hachoir
140 print("Don't have stripper for file type: %s" % editor.description)
141 sys.exit(1)
142 return stripper_class(realname, filename, parser, editor)