1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
|
#!/usr/bin/env python
''' Metadata anonymisation toolkit library
'''
import logging
import mimetypes
import os
import subprocess
import xml.sax
import hachoir_core.cmd_line
import hachoir_parser
import MAT.exceptions
__version__ = '0.5'
__author__ = 'jvoisin'
#Silence
LOGGING_LEVEL = logging.CRITICAL
hachoir_core.config.quiet = True
fname = ''
#Verbose
#LOGGING_LEVEL = logging.DEBUG
#hachoir_core.config.quiet = False
#logname = 'report.log'
logging.basicConfig(filename=fname, level=LOGGING_LEVEL)
import strippers # this is loaded here because we need LOGGING_LEVEL
def get_logo():
''' Return the path to the logo
'''
if os.path.isfile('./data/mat.png'):
return './data/mat.png'
elif os.path.isfile('/usr/share/pixmaps/mat.png'):
return '/usr/share/pixmaps/mat.png'
elif os.path.isfile('/usr/local/share/pixmaps/mat.png'):
return '/usr/local/share/pixmaps/mat.png'
def get_datadir():
''' Return the path to the data directory
'''
if os.path.isdir('./data/'):
return './data/'
elif os.path.isdir('/usr/local/share/mat/'):
return '/usr/local/share/mat/'
elif os.path.isdir('/usr/share/mat/'):
return '/usr/share/mat/'
def list_supported_formats():
''' Return a list of all locally supported fileformat.
It parses that FORMATS file, and removes locally
non-supported formats.
'''
handler = XMLParser()
parser = xml.sax.make_parser()
parser.setContentHandler(handler)
path = os.path.join(get_datadir(), 'FORMATS')
with open(path, 'r') as xmlfile:
parser.parse(xmlfile)
localy_supported = []
for item in handler.list:
if item['mimetype'].split(',')[0] in strippers.STRIPPERS:
localy_supported.append(item)
return localy_supported
class XMLParser(xml.sax.handler.ContentHandler):
''' Parse the supported format xml, and return a corresponding
list of dict
'''
def __init__(self):
self.dict = {}
self.list = []
self.content, self.key = '', ''
self.between = False
def startElement(self, name, attrs):
''' Called when entering into xml tag
'''
self.between = True
self.key = name
self.content = ''
def endElement(self, name):
''' Called when exiting a xml tag
'''
if name == 'format': # leaving a fileformat section
self.list.append(self.dict.copy())
self.dict.clear()
else:
content = self.content.replace('\s', ' ')
self.dict[self.key] = content
self.between = False
def characters(self, characters):
''' Concatenate the content between opening and closing tags
'''
if self.between:
self.content += characters
def secure_remove(filename):
''' Securely remove the file
'''
# I want the file removed, even if it's ro
try:
os.chmod(filename, 0o777)
except OSError:
logging.error('Unable to add write rights to %s' % filename)
raise MAT.exceptions.UnableToWriteFile
try:
if not subprocess.call(['shred', '--remove', filename]):
return True
else:
raise OSError
except OSError:
logging.error('Unable to securely remove %s' % filename)
try:
os.remove(filename)
except OSError:
logging.error('Unable to remove %s' % filename)
raise MAT.exceptions.UnableToRemoveFile
return True
def create_class_file(name, backup, **kwargs):
''' Return a $FILETYPEStripper() class,
corresponding to the filetype of the given file
'''
if not os.path.isfile(name): # check if the file exists
logging.error('%s is not a valid file' % name)
return None
if not os.access(name, os.R_OK): # check read permissions
logging.error('%s is is not readable' % name)
return None
if not os.path.getsize(name):
#check if the file is not empty (hachoir crash on empty files)
logging.error('%s is empty' % name)
return None
filename = ''
try:
filename = hachoir_core.cmd_line.unicodeFilename(name)
except TypeError: # get rid of "decoding Unicode is not supported"
filename = name
parser = hachoir_parser.createParser(filename)
if not parser:
logging.info('Unable to parse %s' % filename)
return None
mime = parser.mime_type
if mime == 'application/zip': # some formats are zipped stuff
if mimetypes.guess_type(name)[0]:
mime = mimetypes.guess_type(name)[0]
if mime.startswith('application/vnd.oasis.opendocument'):
mime = 'application/opendocument' # opendocument fileformat
elif mime.startswith('application/vnd.openxmlformats-officedocument'):
mime = 'application/officeopenxml' # office openxml
is_writable = os.access(name, os.W_OK)
try:
stripper_class = strippers.STRIPPERS[mime]
except KeyError:
logging.info('Don\'t have stripper for %s format' % mime)
return None
return stripper_class(filename, parser, mime, backup, is_writable, **kwargs)
|