summaryrefslogtreecommitdiff
path: root/libmat/mat.py
blob: 2634cc3a9d350168806b1055cc11ed013f88dcda (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#!/usr/bin/env python

""" Metadata anonymisation toolkit library
"""

import logging
import mimetypes
import os
import platform
import subprocess
import xml.sax

import libmat.exceptions

__version__ = '0.5.4'
__author__ = 'jvoisin'

# Silence
LOGGING_LEVEL = logging.CRITICAL
fname = ''

# Verbose
# LOGGING_LEVEL = logging.DEBUG
# logname = 'report.log'

logging.basicConfig(filename=fname, level=LOGGING_LEVEL)

import strippers  # this is loaded here because we need LOGGING_LEVEL


def get_logo():
    """ Return the path to the logo
    """
    if os.path.isfile(os.path.join(os.path.curdir, 'data/mat.png')):
        return os.path.join(os.path.curdir, 'data/mat.png')
    elif os.path.isfile('/usr/share/pixmaps/mat.png'):
        return '/usr/share/pixmaps/mat.png'
    elif os.path.isfile('/usr/local/share/pixmaps/mat.png'):
        return '/usr/local/share/pixmaps/mat.png'


def get_datafile_path(filename):
    """ Return the path to $filename
    :param string filename:
    """
    paths = ['data', '/usr/local/share/mat/', '/usr/share/mat/']
    for path in paths:
        filepath = os.path.join(os.path.curdir, path, filename)
        if os.path.isfile(filepath):
            return filepath


def list_supported_formats():
    """ Return a list of all locally supported fileformat.
        It parses that FORMATS file, and removes locally
        non-supported formats.
    """
    handler = XMLParser()
    parser = xml.sax.make_parser()
    parser.setContentHandler(handler)
    path = get_datafile_path('FORMATS')
    with open(path, 'r') as xmlfile:
        parser.parse(xmlfile)

    localy_supported = []
    for item in handler.list:
        if item['mimetype'].split(',')[0] in strippers.STRIPPERS:
            localy_supported.append(item)

    return localy_supported


class XMLParser(xml.sax.handler.ContentHandler):
    """ Parse the supported format xml, and return a corresponding
        list of dict
    """

    def __init__(self):
        xml.sax.handler.ContentHandler.__init__(self)
        self.dict = {}
        self.list = []
        self.content, self.key = '', ''
        self.between = False

    def startElement(self, name, attrs):
        """ Called when entering into xml tag
        """
        self.between = True
        self.key = name
        self.content = ''

    def endElement(self, name):
        """ Called when exiting a xml tag
        """
        if name == 'format':  # leaving a fileformat section
            self.list.append(self.dict.copy())
            self.dict.clear()
        else:
            content = self.content.replace('\s', ' ')
            self.dict[self.key] = content
            self.between = False

    def characters(self, characters):
        """ Concatenate the content between opening and closing tags
        """
        if self.between:
            self.content += characters


def secure_remove(filename):
    """ Securely remove $filename
    :param str filename: File to be removed
    """
    try:  # I want the file removed, even if it's read-only
        os.chmod(filename, 220)
    except OSError:
        logging.error('Unable to add write rights to %s', filename)
        raise libmat.exceptions.UnableToWriteFile

    try:
        shred = 'shred'
        if platform.system() == 'MacOS':
            shred = 'gshred'
        if not subprocess.call([shred, '--remove', filename]):
            return True
        else:
            raise OSError
    except OSError:
        logging.error('Unable to securely remove %s', filename)

    try:
        os.remove(filename)
    except OSError:
        logging.error('Unable to remove %s', filename)
        raise libmat.exceptions.UnableToRemoveFile

    return True


def create_class_file(name, backup, **kwargs):
    """ Return a $FILETYPEStripper() class,
        corresponding to the filetype of the given file

        :param str name: name of the file to be parsed
        :param bool backup: shell the file be backuped?
    """
    if not os.path.isfile(name):  # check if the file exists
        logging.error('%s is not a valid file', name)
        return None
    elif not os.access(name, os.R_OK):  # check read permissions
        logging.error('%s is is not readable', name)
        return None

    mime = mimetypes.guess_type(name)[0]
    if not mime:
        logging.info('Unable to find mimetype of %s', name)
        return None

    if mime.startswith('application/vnd.oasis.opendocument'):
        mime = 'application/opendocument'  # opendocument fileformat
    elif mime.startswith('application/vnd.openxmlformats-officedocument'):
        mime = 'application/officeopenxml'  # office openxml

    is_writable = os.access(name, os.W_OK)

    try:
        stripper_class = strippers.STRIPPERS[mime]
    except KeyError:
        logging.info('Don\'t have stripper for %s format', mime)
        return None

    return stripper_class(name, mime, backup, is_writable, **kwargs)