summaryrefslogtreecommitdiff
path: root/MAT/mat.py
diff options
context:
space:
mode:
authorjvoisin2012-12-08 02:02:25 +0100
committerjvoisin2012-12-13 14:24:01 +0100
commitcbf8a2a65928694202e19b6bcf56ec84bcbf613c (patch)
treee106475b0d5c003505336b5ae6416e4508bb768b /MAT/mat.py
parent67d5c1fa6b9ab6e1e7328ee57b15d8e46526d72a (diff)
Reorganize source tree and files installation location, cleanup setup.py (Closes: #689409)
Diffstat (limited to 'MAT/mat.py')
-rw-r--r--MAT/mat.py152
1 files changed, 152 insertions, 0 deletions
diff --git a/MAT/mat.py b/MAT/mat.py
new file mode 100644
index 0000000..4c12333
--- /dev/null
+++ b/MAT/mat.py
@@ -0,0 +1,152 @@
1#!/usr/bin/env python
2
3'''
4 Metadata anonymisation toolkit library
5'''
6
7import os
8import subprocess
9import logging
10import mimetypes
11import xml.sax
12
13import hachoir_core.cmd_line
14import hachoir_parser
15
16import strippers
17
18__version__ = '0.3.2'
19__author__ = 'jvoisin'
20
21#Silence
22LOGGING_LEVEL = logging.CRITICAL
23hachoir_core.config.quiet = True
24fname = ''
25
26#Verbose
27LOGGING_LEVEL = logging.DEBUG
28#hachoir_core.config.quiet = False
29#logname = 'report.log'
30
31logging.basicConfig(filename=fname, level=LOGGING_LEVEL)
32
33
34def get_sharedir(filename):
35 '''
36 An ugly hack to find various files
37 '''
38 if os.path.isfile(filename):
39 return filename
40 elif os.path.exists(os.path.join('/usr/local/share/mat/', filename)):
41 return os.path.join('/usr/local/share/mat/', filename)
42 elif os.path.exists(os.path.join('/usr/share/mat/', filename)):
43 return os.path.join('/usr/share/mat', filename)
44 elif os.path.exists(os.path.join('/usr/local/share/pixmaps/', filename)):
45 return os.path.join('/usr/local/share/pixmaps/', filename)
46
47
48class XMLParser(xml.sax.handler.ContentHandler):
49 '''
50 Parse the supported format xml, and return a corresponding
51 list of dict
52 '''
53 def __init__(self):
54 self.dict = {}
55 self.list = []
56 self.content, self.key = '', ''
57 self.between = False
58
59 def startElement(self, name, attrs):
60 '''
61 Called when entering into xml balise
62 '''
63 self.between = True
64 self.key = name
65 self.content = ''
66
67 def endElement(self, name):
68 '''
69 Called when exiting a xml balise
70 '''
71 if name == 'format': # exiting a fileformat section
72 self.list.append(self.dict.copy())
73 self.dict.clear()
74 else:
75 content = self.content.replace('\s', ' ')
76 self.dict[self.key] = content
77 self.between = False
78
79 def characters(self, characters):
80 '''
81 Concatenate the content between opening and closing balises
82 '''
83 if self.between:
84 self.content += characters
85
86
87def secure_remove(filename):
88 '''
89 securely remove the file
90 '''
91 removed = False
92 try:
93 subprocess.call(['shred', '--remove', filename])
94 removed = True
95 except:
96 logging.error('Unable to securely remove %s' % filename)
97
98 if removed is False:
99 try:
100 os.remove(filename)
101 except:
102 logging.error('Unable to remove %s' % filename)
103
104
105def create_class_file(name, backup, add2archive):
106 '''
107 return a $FILETYPEStripper() class,
108 corresponding to the filetype of the given file
109 '''
110 if not os.path.isfile(name):
111 # check if the file exists
112 logging.error('%s is not a valid file' % name)
113 return None
114
115 if not os.access(name, os.R_OK):
116 #check read permissions
117 logging.error('%s is is not readable' % name)
118 return None
119
120 if not os.access(name, os.W_OK):
121 #check write permission
122 logging.error('%s is not writtable' % name)
123 return None
124
125 filename = ''
126 try:
127 filename = hachoir_core.cmd_line.unicodeFilename(name)
128 except TypeError: # get rid of "decoding Unicode is not supported"
129 filename = name
130
131 parser = hachoir_parser.createParser(filename)
132 if not parser:
133 logging.info('Unable to parse %s' % filename)
134 return None
135
136 mime = parser.mime_type
137
138 if mime == 'application/zip': # some formats are zipped stuff
139 mime = mimetypes.guess_type(name)[0]
140
141 if mime.startswith('application/vnd.oasis.opendocument'):
142 mime = 'application/opendocument' # opendocument fileformat
143 elif mime.startswith('application/vnd.openxmlformats-officedocument'):
144 mime = 'application/officeopenxml' # office openxml
145
146 try:
147 stripper_class = strippers.STRIPPERS[mime]
148 except KeyError:
149 logging.info('Don\'t have stripper for %s format' % mime)
150 return None
151
152 return stripper_class(filename, parser, mime, backup, add2archive)