From 43ddba48316f5003e4cd6482f73546a5dfdedb0d Mon Sep 17 00:00:00 2001 From: jvoisin Date: Wed, 1 Feb 2012 23:19:42 +0100 Subject: Oops, forgot to add "mat" --- mat | 161 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ mat.1 | 78 ++++++++++++++++++++++++++++++++ 2 files changed, 239 insertions(+) create mode 100755 mat create mode 100644 mat.1 diff --git a/mat b/mat new file mode 100755 index 0000000..ef83d84 --- /dev/null +++ b/mat @@ -0,0 +1,161 @@ +#!/usr/bin/env python +''' + Metadata anonymisation toolkit - CLI edition +''' + +import sys +import xml.sax +import optparse +import os + +import hachoir_core + +from lib import mat + + +def parse(): + ''' + Get, and parse options passed to the program + ''' + parser = optparse.OptionParser(usage='%prog [options] files\n\ +The default behaviour is to clean files given in argument') + options = optparse.OptionGroup(parser, 'Options') + options.add_option('--add2archive', '-a', action='store_true', + default=False, help='Add to output archive non-supported filetypes') + options.add_option('--backup', '-b', action='store_true', default=False, + help='Keep a backup copy') + options.add_option('--force', '-f', action='store_true', default=False, + help='Don\'t check if files are clean before cleaning') + options.add_option('--strict', '-u', action='store_true', default=False, + help='Strict cleaning mode : loss can occur') + + info = optparse.OptionGroup(parser, 'Informations') + info.add_option('--check', '-c', action='store_true', default=False, + help='Check if a file is free of harmful metadatas') + info.add_option('--display', '-d', action='store_true', default=False, + help='List all the harmful metadata of a file without removing them') + info.add_option('--list', '-l', action='store_true', default=False, + help='List all supported fileformat') + info.add_option('--version', '-v', action='callback', + callback=display_version, help='Display version and exit') + parser.add_option_group(options) + parser.add_option_group(info) + + values, arguments = parser.parse_args() + if not arguments and values.list is False: + # if no argument and no files are passed, + # print help and exit + parser.print_help() + sys.exit(0) + return values, arguments + + +def display_version(*_): + ''' + Display the program's version, and exit + ''' + print('Metadata Anonymisation Toolkit version %s') % mat.__version__ + print('Hachoir version %s') % hachoir_core.__version__ + sys.exit(0) + + +def list_meta(class_file, filename, force): + ''' + Print all the metadata of 'filename' on stdout + ''' + print('[+] File %s :' % filename) + if force is False and class_file.is_clean(): + print('No harmful metadata found') + else: + meta = class_file.get_meta() + print ('Harmful metadata found:') + if meta is not None: + for key, value in class_file.get_meta().iteritems(): + print('\t' + key + ' : ' + str(value)) + + +def is_clean(class_file, filename, force): + ''' + Say if 'filename' is clean or not + ''' + if class_file.is_clean(): + print('[+] %s is clean' % filename) + else: + print('[+] %s is not clean' % filename) + + +def clean_meta(class_file, filename, force): + ''' + Clean the file 'filename' + ''' + print('[+] Cleaning %s' % filename) + if force is False and class_file.is_clean(): + print('%s is already clean' % filename) + else: + if class_file.remove_all(): + print('%s cleaned !' % filename) + else: + print('Unable to clean %s', filename) + +def clean_meta_strict(class_file, filename, force): + ''' + Clean the file 'filename', strict way + ''' + print('[+] Cleaning %s' % filename) + if force is False and class_file.is_clean(): + print('%s is already clean' % filename) + else: + class_file.remove_all_strict() + print('%s cleaned' % filename) + + +def list_supported(): + ''' + Print all supported fileformat, and exit + ''' + handler = mat.XMLParser() + parser = xml.sax.make_parser() + parser.setContentHandler(handler) + path = os.path.join(mat.get_sharedir(), 'FORMATS') + with open(path, 'r') as xmlfile: + parser.parse(xmlfile) + + for item in handler.list: + print('%s (%s)' % (item['name'], item['extension'])) + print('\tsupport : ' + item['support']) + print('\tmetadata : ' + item['metadata']) + print('\tmethod : ' + item['method']) + if item['support'] == 'partial': + print('\tremaining : ' + item['remaining']) + print('\n') + sys.exit(0) + + +def main(): + ''' + main function : get args, and launch the appropriate function + ''' + args, filenames = parse() + + #func receive the function correponding to the options given as parameters + if args.display is True: # only print metadatas + func = list_meta + elif args.check is True: # only check if the file is clean + func = is_clean + elif args.strict is True: # destructive anonymisation method + func = clean_meta_strict + elif args.list is True: # print the list of all supported format + list_supported() + else: # clean the file + func = clean_meta + + for filename in filenames: + class_file = mat.create_class_file(filename, args.backup, + args.add2archive) + if class_file is not None: + func(class_file, filename, args.force) + else: + print('Unable to process %s' % filename) + +if __name__ == '__main__': + main() diff --git a/mat.1 b/mat.1 new file mode 100644 index 0000000..48ebf9a --- /dev/null +++ b/mat.1 @@ -0,0 +1,78 @@ +.TH METADATA "1" "August 2011" "Metadata Anonymisation Toolkit" "User Commands" + + +.SH NAME +MAT \- Metadata Anonymisation Toolkit + + +.SH SYNOPSIS +.B mat-cli +[\fIoptions\fR] \fIfiles\fR +.TP +.B mat-gui + + +.SH DESCRIPTION +The \fBMetadata Anonymisation Toolkit\fR is a lib (with a CLI and a GUI) +created to anonymise file's \fBmetadata\fR. In essence, metadata answer who, +what, when, where, why, and how about every face of the data that are being +documented. They can be a \fBrisk for privacy\fR. + + +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +show this help message and exit +.TP +\fB\-a\fR, \fB\-\-add2archive\fR +Add to outputed archive non\-supported filetypes +.TP +\fB\-b\fR, \fB\-\-backup\fR +Keep a backup copy +.TP +\fB\-c\fR, \fB\-\-check\fR +Check if a file is free of harmful metadatas +.TP +\fB\-d\fR, \fB\-\-display\fR +List all the harmful meta of a file without removing them +.TP +\fB\-f\fR, \fB\-\-force\fR +Don't check if files are clean before cleaning +.TP +\fB\-l\fR, \fB\-\-list\fR +List all supported fileformat +.TP +\fB\-u\fR, \fB\-\-strict\fR +Remove harmful meta, but loss can occure +.TP +\fB\-v\fR, \fB\-\-version\fR +Display version and exit + + +.SH EXAMPLES +.TP +\fBmat-cli \-\-display\fR mydocument.pdf +Display the mydocument.pdf's harmful metadata +.TP +\fBmat-cli \-\-check *.jpg\fR +Check all the jpg images from the current folder + + +.SH NOTES +MAT \fBonly process metadata\fR, it does \fBnot\fR handle file data. +Blame yourself if you are traced back because of the data of your files. +MAT is not perfect : In most of the cases, a forensic expert with a lot +of time \fBcan trace back\fR your document. +If you want absolute privacy, use plain-text. + + +.SH AUTHOR +Julien (\fBjvoisin\fR) Voisin , during the GSoC 2011 + + +.SH BUGS +MAT does not handle watermaking/tattoo for now. + + +.SH "SEE ALSO" +exiftool, hachoir-metadata -- cgit v1.3