summaryrefslogtreecommitdiff
path: root/lib/mat.py
diff options
context:
space:
mode:
Diffstat (limited to 'lib/mat.py')
-rw-r--r--lib/mat.py162
1 files changed, 0 insertions, 162 deletions
diff --git a/lib/mat.py b/lib/mat.py
deleted file mode 100644
index fd13287..0000000
--- a/lib/mat.py
+++ /dev/null
@@ -1,162 +0,0 @@
1#!/usr/bin/env python
2
3'''
4 Metadata anonymisation toolkit library
5'''
6
7import os
8import subprocess
9import logging
10import mimetypes
11import xml.sax
12
13import hachoir_core.cmd_line
14import hachoir_parser
15
16import images
17import audio
18import office
19import archive
20import misc
21
22__version__ = '0.1'
23__author__ = 'jvoisin'
24
25LOGGING_LEVEL = logging.DEBUG
26
27logging.basicConfig(level=LOGGING_LEVEL)
28
29STRIPPERS = {
30 'application/x-tar': archive.TarStripper,
31 'application/x-gzip': archive.GzipStripper,
32 'application/x-bzip2': archive.Bzip2Stripper,
33 'application/zip': archive.ZipStripper,
34 'audio/mpeg': audio.MpegAudioStripper,
35 'image/jpeg': images.JpegStripper,
36 'image/png': images.PngStripper,
37 'application/x-bittorrent': misc.TorrentStripper,
38 'application/opendocument': office.OpenDocumentStripper,
39 'application/officeopenxml': office.OpenXmlStripper,
40}
41
42try:
43 import poppler
44 import cairo
45 STRIPPERS['application/x-pdf'] = office.PdfStripper
46 STRIPPERS['application/pdf'] = office.PdfStripper
47except ImportError:
48 print('Unable to import python-poppler and/or python-cairo: no pdf \
49 support')
50
51try:
52 import mutagen
53 STRIPPERS['audio/x-flac'] = audio.FlacStripper
54 STRIPPERS['audio/vorbis'] = audio.OggStripper
55except ImportError:
56 print('unable to import python-mutagen : limited audio format support')
57
58
59class XMLParser(xml.sax.handler.ContentHandler):
60 '''
61 Parse the supported format xml, and return a corresponding
62 list of dict
63 '''
64 def __init__(self):
65 self.dict = {}
66 self.list = []
67 self.content, self.key = '', ''
68 self.between = False
69
70 def startElement(self, name, attrs):
71 '''
72 Called when entering into xml balise
73 '''
74 self.between = True
75 self.key = name
76 self.content = ''
77
78 def endElement(self, name):
79 '''
80 Called when exiting a xml balise
81 '''
82 if name == 'format': # exiting a fileformat section
83 self.list.append(self.dict.copy())
84 self.dict.clear()
85 else:
86 content = self.content.replace('\s', ' ')
87 self.dict[self.key] = content
88 self.between = False
89
90 def characters(self, characters):
91 '''
92 Concatenate the content between opening and closing balises
93 '''
94 if self.between is True:
95 self.content += characters
96
97
98def secure_remove(filename):
99 '''
100 securely remove the file
101 '''
102 removed = False
103 try:
104 subprocess.call('shred --remove %s' % filename, shell=True)
105 removed = True
106 except:
107 logging.error('Unable to securely remove %s' % filename)
108
109 if removed is False:
110 try:
111 os.remove(filename)
112 except:
113 logging.error('Unable to remove %s' % filename)
114
115
116def is_secure(filename):
117 '''
118 Prevent shell injection
119 '''
120 if not(os.path.isfile(filename)): # check if the file exist
121 logging.error('%s is not a valid file' % filename)
122 return False
123 else:
124 return True
125
126
127def create_class_file(name, backup, add2archive):
128 '''
129 return a $FILETYPEStripper() class,
130 corresponding to the filetype of the given file
131 '''
132 if not is_secure(name):
133 return
134
135 filename = ''
136 try:
137 filename = hachoir_core.cmd_line.unicodeFilename(name)
138 except TypeError: # get rid of "decoding Unicode is not supported"
139 filename = name
140
141 parser = hachoir_parser.createParser(filename)
142 if not parser:
143 logging.info('Unable to parse %s' % filename)
144 return
145
146 mime = parser.mime_type
147
148 if mime == 'application/zip': # some formats are zipped stuff
149 mime = mimetypes.guess_type(name)[0]
150
151 if mime.startswith('application/vnd.oasis.opendocument'):
152 mime = 'application/opendocument' # opendocument fileformat
153 elif mime.startswith('application/vnd.openxmlformats-officedocument'):
154 mime = 'application/officeopenxml' # office openxml
155
156 try:
157 stripper_class = STRIPPERS[mime]
158 except KeyError:
159 logging.info('Don\'t have stripper for %s format' % mime)
160 return
161
162 return stripper_class(filename, parser, mime, backup, add2archive)