diff options
Diffstat (limited to 'mat2.py')
| -rwxr-xr-x | mat2.py | 121 |
1 files changed, 121 insertions, 0 deletions
| @@ -0,0 +1,121 @@ | |||
| 1 | #!/usr/bin/python3 | ||
| 2 | |||
| 3 | import os | ||
| 4 | from typing import Tuple | ||
| 5 | import sys | ||
| 6 | import itertools | ||
| 7 | import mimetypes | ||
| 8 | import argparse | ||
| 9 | import multiprocessing | ||
| 10 | |||
| 11 | from libmat2 import parser_factory, unsupported_extensions | ||
| 12 | |||
| 13 | __version__ = '0.1.1' | ||
| 14 | |||
| 15 | def __check_file(filename: str, mode: int = os.R_OK) -> bool: | ||
| 16 | if not os.path.isfile(filename): | ||
| 17 | print("[-] %s is not a regular file." % filename) | ||
| 18 | return False | ||
| 19 | elif not os.access(filename, mode): | ||
| 20 | print("[-] %s is not readable and writeable." % filename) | ||
| 21 | return False | ||
| 22 | return True | ||
| 23 | |||
| 24 | |||
| 25 | def create_arg_parser(): | ||
| 26 | parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2') | ||
| 27 | parser.add_argument('files', nargs='*') | ||
| 28 | parser.add_argument('-v', '--version', action='version', | ||
| 29 | version='MAT2 %s' % __version__) | ||
| 30 | parser.add_argument('-l', '--list', action='store_true', | ||
| 31 | help='list all supported fileformats') | ||
| 32 | |||
| 33 | info = parser.add_mutually_exclusive_group() | ||
| 34 | info.add_argument('-c', '--check', action='store_true', | ||
| 35 | help='check if a file is free of harmful metadatas') | ||
| 36 | info.add_argument('-s', '--show', action='store_true', | ||
| 37 | help='list all the harmful metadata of a file without removing them') | ||
| 38 | info.add_argument('-L', '--lightweight', action='store_true', | ||
| 39 | help='remove SOME metadata') | ||
| 40 | return parser | ||
| 41 | |||
| 42 | |||
| 43 | def show_meta(filename: str): | ||
| 44 | if not __check_file(filename): | ||
| 45 | return | ||
| 46 | |||
| 47 | p, mtype = parser_factory.get_parser(filename) | ||
| 48 | if p is None: | ||
| 49 | print("[-] %s's format (%s) is not supported" % (filename, mtype)) | ||
| 50 | return | ||
| 51 | |||
| 52 | print("[+] Metadata for %s:" % filename) | ||
| 53 | for k, v in p.get_meta().items(): | ||
| 54 | try: # FIXME this is ugly. | ||
| 55 | print(" %s: %s" % (k, v)) | ||
| 56 | except UnicodeEncodeError: | ||
| 57 | print(" %s: harmful content" % k) | ||
| 58 | |||
| 59 | def clean_meta(params: Tuple[str, bool]) -> bool: | ||
| 60 | filename, is_lightweigth = params | ||
| 61 | if not __check_file(filename, os.R_OK|os.W_OK): | ||
| 62 | return False | ||
| 63 | |||
| 64 | p, mtype = parser_factory.get_parser(filename) | ||
| 65 | if p is None: | ||
| 66 | print("[-] %s's format (%s) is not supported" % (filename, mtype)) | ||
| 67 | return False | ||
| 68 | if is_lightweigth: | ||
| 69 | return p.remove_all_lightweight() | ||
| 70 | return p.remove_all() | ||
| 71 | |||
| 72 | |||
| 73 | def show_parsers(): | ||
| 74 | print('[+] Supported formats:') | ||
| 75 | for parser in parser_factory._get_parsers(): | ||
| 76 | for mtype in parser.mimetypes: | ||
| 77 | extensions = set() | ||
| 78 | for extension in mimetypes.guess_all_extensions(mtype): | ||
| 79 | if extension[1:] not in unsupported_extensions: # skip the dot | ||
| 80 | extensions.add(extension) | ||
| 81 | if not extensions: | ||
| 82 | # we're not supporting a single extension in the current | ||
| 83 | # mimetype, so there is not point in showing the mimetype at all | ||
| 84 | continue | ||
| 85 | print(' - %s (%s)' % (mtype, ', '.join(extensions))) | ||
| 86 | |||
| 87 | |||
| 88 | def __get_files_recursively(files): | ||
| 89 | for f in files: | ||
| 90 | if os.path.isfile(f): | ||
| 91 | yield f | ||
| 92 | else: | ||
| 93 | for path, _, _files in os.walk(f): | ||
| 94 | for _f in _files: | ||
| 95 | yield os.path.join(path, _f) | ||
| 96 | |||
| 97 | def main(): | ||
| 98 | arg_parser = create_arg_parser() | ||
| 99 | args = arg_parser.parse_args() | ||
| 100 | |||
| 101 | if not args.files: | ||
| 102 | if not args.list: | ||
| 103 | return arg_parser.print_help() | ||
| 104 | show_parsers() | ||
| 105 | return 0 | ||
| 106 | |||
| 107 | elif args.show: | ||
| 108 | for f in __get_files_recursively(args.files): | ||
| 109 | show_meta(f) | ||
| 110 | return 0 | ||
| 111 | |||
| 112 | else: | ||
| 113 | p = multiprocessing.Pool() | ||
| 114 | mode = (args.lightweight is True) | ||
| 115 | l = zip(__get_files_recursively(args.files), itertools.repeat(mode)) | ||
| 116 | |||
| 117 | ret = list(p.imap_unordered(clean_meta, list(l))) | ||
| 118 | return 0 if all(ret) else -1 | ||
| 119 | |||
| 120 | if __name__ == '__main__': | ||
| 121 | sys.exit(main()) | ||
