summaryrefslogtreecommitdiff
path: root/mat-cli
blob: 1058d46fc6034dbcdec348f5f64c7c061b78d215 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#!/usr/bin/env python
'''
    Metadata anonymisation toolkit - CLI edition
'''

import sys
import xml.sax
import optparse
import os

import hachoir_core

from mat import mat


def parse():
    '''
        Get, and parse options passed to the program
    '''
    parser = optparse.OptionParser(usage='%prog [options] files\n\
The default behaviour is to clean files given in argument')
    options = optparse.OptionGroup(parser, 'Options')
    options.add_option('--add2archive', '-a', action='store_true',
        default=False, help='Add to output archive non-supported filetypes')
    options.add_option('--backup', '-b', action='store_true', default=False,
        help='Keep a backup copy')
    options.add_option('--force', '-f', action='store_true', default=False,
        help='Don\'t check if files are clean before cleaning')
    options.add_option('--strict', '-u', action='store_true', default=False,
            help='Strict cleaning mode : loss can occur')

    info = optparse.OptionGroup(parser, 'Informations')
    info.add_option('--check', '-c',  action='store_true', default=False,
        help='Check if a file is free of harmful metadatas')
    info.add_option('--display', '-d', action='store_true', default=False,
        help='List all the harmful metadata of a file without removing them')
    info.add_option('--list', '-l', action='store_true', default=False,
        help='List all supported fileformat')
    info.add_option('--version', '-v', action='callback',
        callback=display_version, help='Display version and exit')
    parser.add_option_group(options)
    parser.add_option_group(info)

    values, arguments = parser.parse_args()
    if not arguments and values.list is False:
        # if no argument and no files are passed,
        # print help and exit
        parser.print_help()
        sys.exit(0)
    return values, arguments


def display_version(*_):
    '''
        Display the program's version, and exit
    '''
    print('Metadata Anonymisation Toolkit version %s') % mat.__version__
    print('Hachoir version %s') % hachoir_core.__version__
    sys.exit(0)


def list_meta(class_file, filename, force):
    '''
        Print all the metadata of 'filename' on stdout
    '''
    print('[+] File %s :' % filename)
    if force is False and class_file.is_clean():
        print('No harmful metadata found')
    else:
        meta = class_file.get_meta()
        print ('Harmful metadata found:')
        if meta is not None:
            for key, value in class_file.get_meta().iteritems():
                print('\t' + key + ' : ' + str(value))


def is_clean(class_file, filename, force):
    '''
        Say if 'filename' is clean or not
    '''
    if class_file.is_clean():
        print('[+] %s is clean' % filename)
    else:
        print('[+] %s is not clean' % filename)


def clean_meta(class_file, filename, force):
    '''
        Clean the file 'filename'
    '''
    print('[+] Cleaning %s' % filename)
    if force is False and class_file.is_clean():
        print('%s is already clean' % filename)
    else:
        if class_file.remove_all():
            print('%s cleaned !' % filename)
        else:
            print('Unable to clean %s', filename)

def clean_meta_strict(class_file, filename, force):
    '''
        Clean the file 'filename', strict way
    '''
    print('[+] Cleaning %s' % filename)
    if force is False and class_file.is_clean():
        print('%s is already clean' % filename)
    else:
        class_file.remove_all_strict()
        print('%s cleaned' % filename)


def list_supported():
    '''
        Print all supported fileformat, and exit
    '''
    handler = mat.XMLParser()
    parser = xml.sax.make_parser()
    parser.setContentHandler(handler)
    path = os.path.join(mat.get_sharedir(), 'FORMATS')
    with open(path, 'r') as xmlfile:
        parser.parse(xmlfile)

    for item in handler.list:
        print('%s (%s)' % (item['name'], item['extension']))
        print('\tsupport : ' + item['support'])
        print('\tmetadata : ' + item['metadata'])
        print('\tmethod : ' + item['method'])
        if item['support'] == 'partial':
            print('\tremaining : ' + item['remaining'])
        print('\n')
    sys.exit(0)


def main():
    '''
        main function : get args, and launch the appropriate function
    '''
    args, filenames = parse()

    #func receive the function correponding to the options given as parameters
    if args.display is True:  # only print metadatas
        func = list_meta
    elif args.check is True:  # only check if the file is clean
        func = is_clean
    elif args.strict is True:  # destructive anonymisation method
        func = clean_meta_strict
    elif args.list is True:  # print the list of all supported format
        list_supported()
    else:  # clean the file
        func = clean_meta

    for filename in filenames:
        class_file = mat.create_class_file(filename, args.backup,
            args.add2archive)
        if class_file is not None:
            func(class_file, filename, args.force)
        else:
            print('Unable to process %s' % filename)

if __name__ == '__main__':
    main()