1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
|
#!/usr/bin/env python
'''
Metadata anonymisation toolkit library
'''
import os
import subprocess
import logging
import mimetypes
import xml.sax
import hachoir_core.cmd_line
import hachoir_parser
import strippers
__version__ = '0.3.4'
__author__ = 'jvoisin'
#Silence
LOGGING_LEVEL = logging.CRITICAL
hachoir_core.config.quiet = True
fname = ''
#Verbose
LOGGING_LEVEL = logging.DEBUG
#hachoir_core.config.quiet = False
#logname = 'report.log'
logging.basicConfig(filename=fname, level=LOGGING_LEVEL)
def get_logo():
if os.path.isfile('./data/mat.png'):
return './data/mat.png'
elif os.path.isfile('/usr/share/pixmaps/mat.png'):
return '/usr/share/pixmaps/mat.png'
elif os.path.isfile('/usr/local/share/pixmaps/mat.png'):
return '/usr/local/share/pixmaps/mat.png'
def get_formats():
if os.path.isfile('./data/FORMATS'):
return './data/FORMATS'
elif os.path.isfile('/usr/share/mat/FORMATS'):
return '/usr/share/mat/FORMATS'
elif os.path.isfile('/usr/local/share/mat/FORMATS'):
return '/usr/local/share/mat/FORMATS'
class XMLParser(xml.sax.handler.ContentHandler):
'''
Parse the supported format xml, and return a corresponding
list of dict
'''
def __init__(self):
self.dict = {}
self.list = []
self.content, self.key = '', ''
self.between = False
def startElement(self, name, attrs):
'''
Called when entering into xml balise
'''
self.between = True
self.key = name
self.content = ''
def endElement(self, name):
'''
Called when exiting a xml balise
'''
if name == 'format': # exiting a fileformat section
self.list.append(self.dict.copy())
self.dict.clear()
else:
content = self.content.replace('\s', ' ')
self.dict[self.key] = content
self.between = False
def characters(self, characters):
'''
Concatenate the content between opening and closing balises
'''
if self.between:
self.content += characters
def secure_remove(filename):
'''
securely remove the file
'''
removed = False
try:
subprocess.call(['shred', '--remove', filename])
removed = True
except OSError:
logging.error('Unable to securely remove %s' % filename)
if not removed:
try:
os.remove(filename)
except OSError:
logging.error('Unable to remove %s' % filename)
def create_class_file(name, backup, **kwargs):
'''
return a $FILETYPEStripper() class,
corresponding to the filetype of the given file
'''
if not os.path.isfile(name):
# check if the file exists
logging.error('%s is not a valid file' % name)
return None
if not os.access(name, os.R_OK):
#check read permissions
logging.error('%s is is not readable' % name)
return None
if not os.access(name, os.W_OK):
#check write permission
logging.error('%s is not writtable' % name)
return None
filename = ''
try:
filename = hachoir_core.cmd_line.unicodeFilename(name)
except TypeError: # get rid of "decoding Unicode is not supported"
filename = name
parser = hachoir_parser.createParser(filename)
if not parser:
logging.info('Unable to parse %s' % filename)
return None
mime = parser.mime_type
if mime == 'application/zip': # some formats are zipped stuff
mime = mimetypes.guess_type(name)[0]
if mime.startswith('application/vnd.oasis.opendocument'):
mime = 'application/opendocument' # opendocument fileformat
elif mime.startswith('application/vnd.openxmlformats-officedocument'):
mime = 'application/officeopenxml' # office openxml
try:
stripper_class = strippers.STRIPPERS[mime]
except KeyError:
logging.info('Don\'t have stripper for %s format' % mime)
return None
return stripper_class(filename, parser, mime, backup, **kwargs)
|