summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjvoisin2012-02-01 23:19:42 +0100
committerjvoisin2012-02-01 23:19:42 +0100
commit43ddba48316f5003e4cd6482f73546a5dfdedb0d (patch)
tree6f508e3a41e7f117da8a7bb067be017bdba46c8f
parent544fe9bf1782a027b3f31bf4c10a050d783e32ac (diff)
Oops, forgot to add "mat"
-rwxr-xr-xmat161
-rw-r--r--mat.178
2 files changed, 239 insertions, 0 deletions
diff --git a/mat b/mat
new file mode 100755
index 0000000..ef83d84
--- /dev/null
+++ b/mat
@@ -0,0 +1,161 @@
1#!/usr/bin/env python
2'''
3 Metadata anonymisation toolkit - CLI edition
4'''
5
6import sys
7import xml.sax
8import optparse
9import os
10
11import hachoir_core
12
13from lib import mat
14
15
16def parse():
17 '''
18 Get, and parse options passed to the program
19 '''
20 parser = optparse.OptionParser(usage='%prog [options] files\n\
21The default behaviour is to clean files given in argument')
22 options = optparse.OptionGroup(parser, 'Options')
23 options.add_option('--add2archive', '-a', action='store_true',
24 default=False, help='Add to output archive non-supported filetypes')
25 options.add_option('--backup', '-b', action='store_true', default=False,
26 help='Keep a backup copy')
27 options.add_option('--force', '-f', action='store_true', default=False,
28 help='Don\'t check if files are clean before cleaning')
29 options.add_option('--strict', '-u', action='store_true', default=False,
30 help='Strict cleaning mode : loss can occur')
31
32 info = optparse.OptionGroup(parser, 'Informations')
33 info.add_option('--check', '-c', action='store_true', default=False,
34 help='Check if a file is free of harmful metadatas')
35 info.add_option('--display', '-d', action='store_true', default=False,
36 help='List all the harmful metadata of a file without removing them')
37 info.add_option('--list', '-l', action='store_true', default=False,
38 help='List all supported fileformat')
39 info.add_option('--version', '-v', action='callback',
40 callback=display_version, help='Display version and exit')
41 parser.add_option_group(options)
42 parser.add_option_group(info)
43
44 values, arguments = parser.parse_args()
45 if not arguments and values.list is False:
46 # if no argument and no files are passed,
47 # print help and exit
48 parser.print_help()
49 sys.exit(0)
50 return values, arguments
51
52
53def display_version(*_):
54 '''
55 Display the program's version, and exit
56 '''
57 print('Metadata Anonymisation Toolkit version %s') % mat.__version__
58 print('Hachoir version %s') % hachoir_core.__version__
59 sys.exit(0)
60
61
62def list_meta(class_file, filename, force):
63 '''
64 Print all the metadata of 'filename' on stdout
65 '''
66 print('[+] File %s :' % filename)
67 if force is False and class_file.is_clean():
68 print('No harmful metadata found')
69 else:
70 meta = class_file.get_meta()
71 print ('Harmful metadata found:')
72 if meta is not None:
73 for key, value in class_file.get_meta().iteritems():
74 print('\t' + key + ' : ' + str(value))
75
76
77def is_clean(class_file, filename, force):
78 '''
79 Say if 'filename' is clean or not
80 '''
81 if class_file.is_clean():
82 print('[+] %s is clean' % filename)
83 else:
84 print('[+] %s is not clean' % filename)
85
86
87def clean_meta(class_file, filename, force):
88 '''
89 Clean the file 'filename'
90 '''
91 print('[+] Cleaning %s' % filename)
92 if force is False and class_file.is_clean():
93 print('%s is already clean' % filename)
94 else:
95 if class_file.remove_all():
96 print('%s cleaned !' % filename)
97 else:
98 print('Unable to clean %s', filename)
99
100def clean_meta_strict(class_file, filename, force):
101 '''
102 Clean the file 'filename', strict way
103 '''
104 print('[+] Cleaning %s' % filename)
105 if force is False and class_file.is_clean():
106 print('%s is already clean' % filename)
107 else:
108 class_file.remove_all_strict()
109 print('%s cleaned' % filename)
110
111
112def list_supported():
113 '''
114 Print all supported fileformat, and exit
115 '''
116 handler = mat.XMLParser()
117 parser = xml.sax.make_parser()
118 parser.setContentHandler(handler)
119 path = os.path.join(mat.get_sharedir(), 'FORMATS')
120 with open(path, 'r') as xmlfile:
121 parser.parse(xmlfile)
122
123 for item in handler.list:
124 print('%s (%s)' % (item['name'], item['extension']))
125 print('\tsupport : ' + item['support'])
126 print('\tmetadata : ' + item['metadata'])
127 print('\tmethod : ' + item['method'])
128 if item['support'] == 'partial':
129 print('\tremaining : ' + item['remaining'])
130 print('\n')
131 sys.exit(0)
132
133
134def main():
135 '''
136 main function : get args, and launch the appropriate function
137 '''
138 args, filenames = parse()
139
140 #func receive the function correponding to the options given as parameters
141 if args.display is True: # only print metadatas
142 func = list_meta
143 elif args.check is True: # only check if the file is clean
144 func = is_clean
145 elif args.strict is True: # destructive anonymisation method
146 func = clean_meta_strict
147 elif args.list is True: # print the list of all supported format
148 list_supported()
149 else: # clean the file
150 func = clean_meta
151
152 for filename in filenames:
153 class_file = mat.create_class_file(filename, args.backup,
154 args.add2archive)
155 if class_file is not None:
156 func(class_file, filename, args.force)
157 else:
158 print('Unable to process %s' % filename)
159
160if __name__ == '__main__':
161 main()
diff --git a/mat.1 b/mat.1
new file mode 100644
index 0000000..48ebf9a
--- /dev/null
+++ b/mat.1
@@ -0,0 +1,78 @@
1.TH METADATA "1" "August 2011" "Metadata Anonymisation Toolkit" "User Commands"
2
3
4.SH NAME
5MAT \- Metadata Anonymisation Toolkit
6
7
8.SH SYNOPSIS
9.B mat-cli
10[\fIoptions\fR] \fIfiles\fR
11.TP
12.B mat-gui
13
14
15.SH DESCRIPTION
16The \fBMetadata Anonymisation Toolkit\fR is a lib (with a CLI and a GUI)
17created to anonymise file's \fBmetadata\fR. In essence, metadata answer who,
18what, when, where, why, and how about every face of the data that are being
19documented. They can be a \fBrisk for privacy\fR.
20
21
22.SH OPTIONS
23.TP
24\fB\-h\fR, \fB\-\-help\fR
25show this help message and exit
26.TP
27\fB\-a\fR, \fB\-\-add2archive\fR
28Add to outputed archive non\-supported filetypes
29.TP
30\fB\-b\fR, \fB\-\-backup\fR
31Keep a backup copy
32.TP
33\fB\-c\fR, \fB\-\-check\fR
34Check if a file is free of harmful metadatas
35.TP
36\fB\-d\fR, \fB\-\-display\fR
37List all the harmful meta of a file without removing them
38.TP
39\fB\-f\fR, \fB\-\-force\fR
40Don't check if files are clean before cleaning
41.TP
42\fB\-l\fR, \fB\-\-list\fR
43List all supported fileformat
44.TP
45\fB\-u\fR, \fB\-\-strict\fR
46Remove harmful meta, but loss can occure
47.TP
48\fB\-v\fR, \fB\-\-version\fR
49Display version and exit
50
51
52.SH EXAMPLES
53.TP
54\fBmat-cli \-\-display\fR mydocument.pdf
55Display the mydocument.pdf's harmful metadata
56.TP
57\fBmat-cli \-\-check *.jpg\fR
58Check all the jpg images from the current folder
59
60
61.SH NOTES
62MAT \fBonly process metadata\fR, it does \fBnot\fR handle file data.
63Blame yourself if you are traced back because of the data of your files.
64MAT is not perfect : In most of the cases, a forensic expert with a lot
65of time \fBcan trace back\fR your document.
66If you want absolute privacy, use plain-text.
67
68
69.SH AUTHOR
70Julien (\fBjvoisin\fR) Voisin <pouicpouicpouic@gmail.com>, during the GSoC 2011
71
72
73.SH BUGS
74MAT does not handle watermaking/tattoo for now.
75
76
77.SH "SEE ALSO"
78exiftool, hachoir-metadata