summaryrefslogtreecommitdiff
path: root/mat.py
diff options
context:
space:
mode:
Diffstat (limited to 'mat.py')
-rwxr-xr-xmat.py141
1 files changed, 0 insertions, 141 deletions
diff --git a/mat.py b/mat.py
deleted file mode 100755
index 200fc04..0000000
--- a/mat.py
+++ /dev/null
@@ -1,141 +0,0 @@
1#!/usr/bin/python
2
3'''
4 Metadata anonymisation toolkit library
5'''
6
7import hachoir_core.error
8import hachoir_core.field
9import hachoir_core.cmd_line
10import hachoir_parser
11import hachoir_metadata
12import hachoir_editor
13
14import sys
15import os
16import hachoir_parser.image
17
18__version__ = "0.1"
19__author__ = "jvoisin"
20
21POSTFIX = ".cleaned"
22
23class file():
24 def __init__(self, realname, filename, parser, editor):
25 self.meta = {}
26 self.filename = filename
27 self.realname = realname
28 self.parser = parser
29 self.editor = editor
30 self.meta = self.__fill_meta()
31
32 def __fill_meta(self):
33 metadata = {}
34 try:
35 meta = hachoir_metadata.extractMetadata(self.parser)
36 except hachoir_core.error.HachoirError, err:
37 print("Metadata extraction error: %s" % err)
38
39 if not meta:
40 print("Unable to extract metadata from the file %s" % self.filename)
41 sys.exit(1)
42
43 for title in meta:
44 #fixme i'm so dirty
45 if title.values != []: #if the field is not empty
46 value = ""
47 for item in title.values:
48 value = item.text
49 metadata[title.key] = value
50 return metadata
51
52 def is_clean(self):
53 '''
54 Check if the file is clean from harmful metadatas
55 '''
56 for field in self.editor:
57 if self._should_remove(field):
58 return False
59 return True
60
61 def remove_all(self):
62 '''
63 Remove all the files that are compromizing
64 '''
65 for field in self.editor:
66 if self._should_remove(field):
67 self._remove(field)
68 hachoir_core.field.writeIntoFile(self.editor, self.filename + POSTFIX)
69
70 def _remove(self, field):
71 '''
72 Remove the given field
73 '''
74 del self.editor[field.name]
75
76
77 def get_meta(self):
78 '''
79 return a dict with all the meta of the file
80 '''
81 #am I useless ?
82 return self.meta
83
84 def _should_remove(self, key):
85 '''
86 return True if the field is compromizing
87 abstract method
88 '''
89 raise NotImplementedError()
90
91class JpegStripper(file):
92 def _should_remove(self, field):
93 if field.name.startswith('comment'):
94 return True
95 elif field.name in ("photoshop", "exif", "adobe"):
96 return True
97 else:
98 return False
99
100class PngStripper(file):
101 def _should_remove(self, field):
102 if field.name in ('comment'):
103 return True
104 else:
105 return False
106
107strippers = {
108 hachoir_parser.image.JpegFile: JpegStripper,
109 hachoir_parser.image.PngFile: PngStripper,
110}
111
112def create_class_file(name):
113 '''
114 return a $FILETYPEStripper() class,
115 corresponding to the filetype of the given file
116 '''
117 if not(os.path.isfile(name)): #check if the file exist
118 print("Error: %s is not a valid file" % name)
119 sys.exit(1)
120
121 filename = ""
122 realname = name
123 filename = hachoir_core.cmd_line.unicodeFilename(name)
124 parser = hachoir_parser.createParser(filename)
125 if not parser:
126 print("Unable to parse the file %s : sorry" % filename)
127 sys.exit(1)
128
129 editor = hachoir_editor.createEditor(parser)
130 try:
131 '''this part is a little tricky :
132 stripper_class will receice the name of the class $FILETYPEStripper,
133 (which herits from the "file" class), based on the editor
134 of given file (name)
135 '''
136 stripper_class = strippers[editor.input.__class__]
137 except KeyError:
138 #Place for another lib than hachoir
139 print("Don't have stripper for file type: %s" % editor.description)
140 sys.exit(1)
141 return stripper_class(realname, filename, parser, editor)