diff options
| author | jvoisin | 2012-02-01 23:19:42 +0100 |
|---|---|---|
| committer | jvoisin | 2012-02-01 23:19:42 +0100 |
| commit | 43ddba48316f5003e4cd6482f73546a5dfdedb0d (patch) | |
| tree | 6f508e3a41e7f117da8a7bb067be017bdba46c8f | |
| parent | 544fe9bf1782a027b3f31bf4c10a050d783e32ac (diff) | |
Oops, forgot to add "mat"
| -rwxr-xr-x | mat | 161 | ||||
| -rw-r--r-- | mat.1 | 78 |
2 files changed, 239 insertions, 0 deletions
| @@ -0,0 +1,161 @@ | |||
| 1 | #!/usr/bin/env python | ||
| 2 | ''' | ||
| 3 | Metadata anonymisation toolkit - CLI edition | ||
| 4 | ''' | ||
| 5 | |||
| 6 | import sys | ||
| 7 | import xml.sax | ||
| 8 | import optparse | ||
| 9 | import os | ||
| 10 | |||
| 11 | import hachoir_core | ||
| 12 | |||
| 13 | from lib import mat | ||
| 14 | |||
| 15 | |||
| 16 | def parse(): | ||
| 17 | ''' | ||
| 18 | Get, and parse options passed to the program | ||
| 19 | ''' | ||
| 20 | parser = optparse.OptionParser(usage='%prog [options] files\n\ | ||
| 21 | The default behaviour is to clean files given in argument') | ||
| 22 | options = optparse.OptionGroup(parser, 'Options') | ||
| 23 | options.add_option('--add2archive', '-a', action='store_true', | ||
| 24 | default=False, help='Add to output archive non-supported filetypes') | ||
| 25 | options.add_option('--backup', '-b', action='store_true', default=False, | ||
| 26 | help='Keep a backup copy') | ||
| 27 | options.add_option('--force', '-f', action='store_true', default=False, | ||
| 28 | help='Don\'t check if files are clean before cleaning') | ||
| 29 | options.add_option('--strict', '-u', action='store_true', default=False, | ||
| 30 | help='Strict cleaning mode : loss can occur') | ||
| 31 | |||
| 32 | info = optparse.OptionGroup(parser, 'Informations') | ||
| 33 | info.add_option('--check', '-c', action='store_true', default=False, | ||
| 34 | help='Check if a file is free of harmful metadatas') | ||
| 35 | info.add_option('--display', '-d', action='store_true', default=False, | ||
| 36 | help='List all the harmful metadata of a file without removing them') | ||
| 37 | info.add_option('--list', '-l', action='store_true', default=False, | ||
| 38 | help='List all supported fileformat') | ||
| 39 | info.add_option('--version', '-v', action='callback', | ||
| 40 | callback=display_version, help='Display version and exit') | ||
| 41 | parser.add_option_group(options) | ||
| 42 | parser.add_option_group(info) | ||
| 43 | |||
| 44 | values, arguments = parser.parse_args() | ||
| 45 | if not arguments and values.list is False: | ||
| 46 | # if no argument and no files are passed, | ||
| 47 | # print help and exit | ||
| 48 | parser.print_help() | ||
| 49 | sys.exit(0) | ||
| 50 | return values, arguments | ||
| 51 | |||
| 52 | |||
| 53 | def display_version(*_): | ||
| 54 | ''' | ||
| 55 | Display the program's version, and exit | ||
| 56 | ''' | ||
| 57 | print('Metadata Anonymisation Toolkit version %s') % mat.__version__ | ||
| 58 | print('Hachoir version %s') % hachoir_core.__version__ | ||
| 59 | sys.exit(0) | ||
| 60 | |||
| 61 | |||
| 62 | def list_meta(class_file, filename, force): | ||
| 63 | ''' | ||
| 64 | Print all the metadata of 'filename' on stdout | ||
| 65 | ''' | ||
| 66 | print('[+] File %s :' % filename) | ||
| 67 | if force is False and class_file.is_clean(): | ||
| 68 | print('No harmful metadata found') | ||
| 69 | else: | ||
| 70 | meta = class_file.get_meta() | ||
| 71 | print ('Harmful metadata found:') | ||
| 72 | if meta is not None: | ||
| 73 | for key, value in class_file.get_meta().iteritems(): | ||
| 74 | print('\t' + key + ' : ' + str(value)) | ||
| 75 | |||
| 76 | |||
| 77 | def is_clean(class_file, filename, force): | ||
| 78 | ''' | ||
| 79 | Say if 'filename' is clean or not | ||
| 80 | ''' | ||
| 81 | if class_file.is_clean(): | ||
| 82 | print('[+] %s is clean' % filename) | ||
| 83 | else: | ||
| 84 | print('[+] %s is not clean' % filename) | ||
| 85 | |||
| 86 | |||
| 87 | def clean_meta(class_file, filename, force): | ||
| 88 | ''' | ||
| 89 | Clean the file 'filename' | ||
| 90 | ''' | ||
| 91 | print('[+] Cleaning %s' % filename) | ||
| 92 | if force is False and class_file.is_clean(): | ||
| 93 | print('%s is already clean' % filename) | ||
| 94 | else: | ||
| 95 | if class_file.remove_all(): | ||
| 96 | print('%s cleaned !' % filename) | ||
| 97 | else: | ||
| 98 | print('Unable to clean %s', filename) | ||
| 99 | |||
| 100 | def clean_meta_strict(class_file, filename, force): | ||
| 101 | ''' | ||
| 102 | Clean the file 'filename', strict way | ||
| 103 | ''' | ||
| 104 | print('[+] Cleaning %s' % filename) | ||
| 105 | if force is False and class_file.is_clean(): | ||
| 106 | print('%s is already clean' % filename) | ||
| 107 | else: | ||
| 108 | class_file.remove_all_strict() | ||
| 109 | print('%s cleaned' % filename) | ||
| 110 | |||
| 111 | |||
| 112 | def list_supported(): | ||
| 113 | ''' | ||
| 114 | Print all supported fileformat, and exit | ||
| 115 | ''' | ||
| 116 | handler = mat.XMLParser() | ||
| 117 | parser = xml.sax.make_parser() | ||
| 118 | parser.setContentHandler(handler) | ||
| 119 | path = os.path.join(mat.get_sharedir(), 'FORMATS') | ||
| 120 | with open(path, 'r') as xmlfile: | ||
| 121 | parser.parse(xmlfile) | ||
| 122 | |||
| 123 | for item in handler.list: | ||
| 124 | print('%s (%s)' % (item['name'], item['extension'])) | ||
| 125 | print('\tsupport : ' + item['support']) | ||
| 126 | print('\tmetadata : ' + item['metadata']) | ||
| 127 | print('\tmethod : ' + item['method']) | ||
| 128 | if item['support'] == 'partial': | ||
| 129 | print('\tremaining : ' + item['remaining']) | ||
| 130 | print('\n') | ||
| 131 | sys.exit(0) | ||
| 132 | |||
| 133 | |||
| 134 | def main(): | ||
| 135 | ''' | ||
| 136 | main function : get args, and launch the appropriate function | ||
| 137 | ''' | ||
| 138 | args, filenames = parse() | ||
| 139 | |||
| 140 | #func receive the function correponding to the options given as parameters | ||
| 141 | if args.display is True: # only print metadatas | ||
| 142 | func = list_meta | ||
| 143 | elif args.check is True: # only check if the file is clean | ||
| 144 | func = is_clean | ||
| 145 | elif args.strict is True: # destructive anonymisation method | ||
| 146 | func = clean_meta_strict | ||
| 147 | elif args.list is True: # print the list of all supported format | ||
| 148 | list_supported() | ||
| 149 | else: # clean the file | ||
| 150 | func = clean_meta | ||
| 151 | |||
| 152 | for filename in filenames: | ||
| 153 | class_file = mat.create_class_file(filename, args.backup, | ||
| 154 | args.add2archive) | ||
| 155 | if class_file is not None: | ||
| 156 | func(class_file, filename, args.force) | ||
| 157 | else: | ||
| 158 | print('Unable to process %s' % filename) | ||
| 159 | |||
| 160 | if __name__ == '__main__': | ||
| 161 | main() | ||
| @@ -0,0 +1,78 @@ | |||
| 1 | .TH METADATA "1" "August 2011" "Metadata Anonymisation Toolkit" "User Commands" | ||
| 2 | |||
| 3 | |||
| 4 | .SH NAME | ||
| 5 | MAT \- Metadata Anonymisation Toolkit | ||
| 6 | |||
| 7 | |||
| 8 | .SH SYNOPSIS | ||
| 9 | .B mat-cli | ||
| 10 | [\fIoptions\fR] \fIfiles\fR | ||
| 11 | .TP | ||
| 12 | .B mat-gui | ||
| 13 | |||
| 14 | |||
| 15 | .SH DESCRIPTION | ||
| 16 | The \fBMetadata Anonymisation Toolkit\fR is a lib (with a CLI and a GUI) | ||
| 17 | created to anonymise file's \fBmetadata\fR. In essence, metadata answer who, | ||
| 18 | what, when, where, why, and how about every face of the data that are being | ||
| 19 | documented. They can be a \fBrisk for privacy\fR. | ||
| 20 | |||
| 21 | |||
| 22 | .SH OPTIONS | ||
| 23 | .TP | ||
| 24 | \fB\-h\fR, \fB\-\-help\fR | ||
| 25 | show this help message and exit | ||
| 26 | .TP | ||
| 27 | \fB\-a\fR, \fB\-\-add2archive\fR | ||
| 28 | Add to outputed archive non\-supported filetypes | ||
| 29 | .TP | ||
| 30 | \fB\-b\fR, \fB\-\-backup\fR | ||
| 31 | Keep a backup copy | ||
| 32 | .TP | ||
| 33 | \fB\-c\fR, \fB\-\-check\fR | ||
| 34 | Check if a file is free of harmful metadatas | ||
| 35 | .TP | ||
| 36 | \fB\-d\fR, \fB\-\-display\fR | ||
| 37 | List all the harmful meta of a file without removing them | ||
| 38 | .TP | ||
| 39 | \fB\-f\fR, \fB\-\-force\fR | ||
| 40 | Don't check if files are clean before cleaning | ||
| 41 | .TP | ||
| 42 | \fB\-l\fR, \fB\-\-list\fR | ||
| 43 | List all supported fileformat | ||
| 44 | .TP | ||
| 45 | \fB\-u\fR, \fB\-\-strict\fR | ||
| 46 | Remove harmful meta, but loss can occure | ||
| 47 | .TP | ||
| 48 | \fB\-v\fR, \fB\-\-version\fR | ||
| 49 | Display version and exit | ||
| 50 | |||
| 51 | |||
| 52 | .SH EXAMPLES | ||
| 53 | .TP | ||
| 54 | \fBmat-cli \-\-display\fR mydocument.pdf | ||
| 55 | Display the mydocument.pdf's harmful metadata | ||
| 56 | .TP | ||
| 57 | \fBmat-cli \-\-check *.jpg\fR | ||
| 58 | Check all the jpg images from the current folder | ||
| 59 | |||
| 60 | |||
| 61 | .SH NOTES | ||
| 62 | MAT \fBonly process metadata\fR, it does \fBnot\fR handle file data. | ||
| 63 | Blame yourself if you are traced back because of the data of your files. | ||
| 64 | MAT is not perfect : In most of the cases, a forensic expert with a lot | ||
| 65 | of time \fBcan trace back\fR your document. | ||
| 66 | If you want absolute privacy, use plain-text. | ||
| 67 | |||
| 68 | |||
| 69 | .SH AUTHOR | ||
| 70 | Julien (\fBjvoisin\fR) Voisin <pouicpouicpouic@gmail.com>, during the GSoC 2011 | ||
| 71 | |||
| 72 | |||
| 73 | .SH BUGS | ||
| 74 | MAT does not handle watermaking/tattoo for now. | ||
| 75 | |||
| 76 | |||
| 77 | .SH "SEE ALSO" | ||
| 78 | exiftool, hachoir-metadata | ||
