summaryrefslogtreecommitdiff
path: root/mat2.py
diff options
context:
space:
mode:
Diffstat (limited to 'mat2.py')
-rwxr-xr-xmat2.py121
1 files changed, 121 insertions, 0 deletions
diff --git a/mat2.py b/mat2.py
new file mode 100755
index 0000000..aa213ab
--- /dev/null
+++ b/mat2.py
@@ -0,0 +1,121 @@
1#!/usr/bin/python3
2
3import os
4from typing import Tuple
5import sys
6import itertools
7import mimetypes
8import argparse
9import multiprocessing
10
11from libmat2 import parser_factory, unsupported_extensions
12
13__version__ = '0.1.1'
14
15def __check_file(filename: str, mode: int = os.R_OK) -> bool:
16 if not os.path.isfile(filename):
17 print("[-] %s is not a regular file." % filename)
18 return False
19 elif not os.access(filename, mode):
20 print("[-] %s is not readable and writeable." % filename)
21 return False
22 return True
23
24
25def create_arg_parser():
26 parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2')
27 parser.add_argument('files', nargs='*')
28 parser.add_argument('-v', '--version', action='version',
29 version='MAT2 %s' % __version__)
30 parser.add_argument('-l', '--list', action='store_true',
31 help='list all supported fileformats')
32
33 info = parser.add_mutually_exclusive_group()
34 info.add_argument('-c', '--check', action='store_true',
35 help='check if a file is free of harmful metadatas')
36 info.add_argument('-s', '--show', action='store_true',
37 help='list all the harmful metadata of a file without removing them')
38 info.add_argument('-L', '--lightweight', action='store_true',
39 help='remove SOME metadata')
40 return parser
41
42
43def show_meta(filename: str):
44 if not __check_file(filename):
45 return
46
47 p, mtype = parser_factory.get_parser(filename)
48 if p is None:
49 print("[-] %s's format (%s) is not supported" % (filename, mtype))
50 return
51
52 print("[+] Metadata for %s:" % filename)
53 for k, v in p.get_meta().items():
54 try: # FIXME this is ugly.
55 print(" %s: %s" % (k, v))
56 except UnicodeEncodeError:
57 print(" %s: harmful content" % k)
58
59def clean_meta(params: Tuple[str, bool]) -> bool:
60 filename, is_lightweigth = params
61 if not __check_file(filename, os.R_OK|os.W_OK):
62 return False
63
64 p, mtype = parser_factory.get_parser(filename)
65 if p is None:
66 print("[-] %s's format (%s) is not supported" % (filename, mtype))
67 return False
68 if is_lightweigth:
69 return p.remove_all_lightweight()
70 return p.remove_all()
71
72
73def show_parsers():
74 print('[+] Supported formats:')
75 for parser in parser_factory._get_parsers():
76 for mtype in parser.mimetypes:
77 extensions = set()
78 for extension in mimetypes.guess_all_extensions(mtype):
79 if extension[1:] not in unsupported_extensions: # skip the dot
80 extensions.add(extension)
81 if not extensions:
82 # we're not supporting a single extension in the current
83 # mimetype, so there is not point in showing the mimetype at all
84 continue
85 print(' - %s (%s)' % (mtype, ', '.join(extensions)))
86
87
88def __get_files_recursively(files):
89 for f in files:
90 if os.path.isfile(f):
91 yield f
92 else:
93 for path, _, _files in os.walk(f):
94 for _f in _files:
95 yield os.path.join(path, _f)
96
97def main():
98 arg_parser = create_arg_parser()
99 args = arg_parser.parse_args()
100
101 if not args.files:
102 if not args.list:
103 return arg_parser.print_help()
104 show_parsers()
105 return 0
106
107 elif args.show:
108 for f in __get_files_recursively(args.files):
109 show_meta(f)
110 return 0
111
112 else:
113 p = multiprocessing.Pool()
114 mode = (args.lightweight is True)
115 l = zip(__get_files_recursively(args.files), itertools.repeat(mode))
116
117 ret = list(p.imap_unordered(clean_meta, list(l)))
118 return 0 if all(ret) else -1
119
120if __name__ == '__main__':
121 sys.exit(main())