diff options
| author | jvoisin | 2018-10-05 17:00:59 +0200 |
|---|---|---|
| committer | jvoisin | 2018-10-05 17:07:58 +0200 |
| commit | 0d25b18d266b1cd546294fee8ba735831f9a7fef (patch) | |
| tree | 9cac68778ece89d246232ad1c4a1db3abcd58999 | |
| parent | d0f3534efffb057e0f6a5decde70d96b6c98cab8 (diff) | |
Improve both the typing and the comments
| -rw-r--r-- | .gitlab-ci.yml | 2 | ||||
| -rw-r--r-- | libmat2/office.py | 17 | ||||
| -rwxr-xr-x | mat2 | 31 |
3 files changed, 26 insertions, 24 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b31d088..29e3553 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml | |||
| @@ -9,7 +9,7 @@ bandit: | |||
| 9 | script: # TODO: remove B405 and B314 | 9 | script: # TODO: remove B405 and B314 |
| 10 | - apt-get -qqy update | 10 | - apt-get -qqy update |
| 11 | - apt-get -qqy install --no-install-recommends python3-bandit | 11 | - apt-get -qqy install --no-install-recommends python3-bandit |
| 12 | - bandit ./mat2 --format txt | 12 | - bandit ./mat2 --format txt --skip B101 |
| 13 | - bandit -r ./nautilus/ --format txt --skip B101 | 13 | - bandit -r ./nautilus/ --format txt --skip B101 |
| 14 | - bandit -r ./libmat2 --format txt --skip B101,B404,B603,B405,B314 | 14 | - bandit -r ./libmat2 --format txt --skip B101,B404,B603,B405,B314 |
| 15 | 15 | ||
diff --git a/libmat2/office.py b/libmat2/office.py index 54347ea..32e7b75 100644 --- a/libmat2/office.py +++ b/libmat2/office.py | |||
| @@ -2,7 +2,7 @@ import logging | |||
| 2 | import os | 2 | import os |
| 3 | import re | 3 | import re |
| 4 | import zipfile | 4 | import zipfile |
| 5 | from typing import Dict, Set, Pattern | 5 | from typing import Dict, Set, Pattern, Tuple |
| 6 | 6 | ||
| 7 | import xml.etree.ElementTree as ET # type: ignore | 7 | import xml.etree.ElementTree as ET # type: ignore |
| 8 | 8 | ||
| @@ -14,9 +14,8 @@ from .archive import ArchiveBasedAbstractParser | |||
| 14 | assert Set | 14 | assert Set |
| 15 | assert Pattern | 15 | assert Pattern |
| 16 | 16 | ||
| 17 | def _parse_xml(full_path: str): | 17 | def _parse_xml(full_path: str) -> Tuple[ET.ElementTree, Dict[str, str]]: |
| 18 | """ This function parses XML, with namespace support. """ | 18 | """ This function parses XML, with namespace support. """ |
| 19 | |||
| 20 | namespace_map = dict() | 19 | namespace_map = dict() |
| 21 | for _, (key, value) in ET.iterparse(full_path, ("start-ns", )): | 20 | for _, (key, value) in ET.iterparse(full_path, ("start-ns", )): |
| 22 | # The ns[0-9]+ namespaces are reserved for internal usage, so | 21 | # The ns[0-9]+ namespaces are reserved for internal usage, so |
| @@ -183,20 +182,20 @@ class MSOfficeParser(ArchiveBasedAbstractParser): | |||
| 183 | 182 | ||
| 184 | parent_map = {c:p for p in tree.iter() for c in p} | 183 | parent_map = {c:p for p in tree.iter() for c in p} |
| 185 | 184 | ||
| 186 | elements = list() | 185 | elements_del = list() |
| 187 | for element in tree.iterfind('.//w:del', namespace): | 186 | for element in tree.iterfind('.//w:del', namespace): |
| 188 | elements.append(element) | 187 | elements_del.append(element) |
| 189 | for element in elements: | 188 | for element in elements_del: |
| 190 | parent_map[element].remove(element) | 189 | parent_map[element].remove(element) |
| 191 | 190 | ||
| 192 | elements = list() | 191 | elements_ins = list() |
| 193 | for element in tree.iterfind('.//w:ins', namespace): | 192 | for element in tree.iterfind('.//w:ins', namespace): |
| 194 | for position, item in enumerate(tree.iter()): # pragma: no cover | 193 | for position, item in enumerate(tree.iter()): # pragma: no cover |
| 195 | if item == element: | 194 | if item == element: |
| 196 | for children in element.iterfind('./*'): | 195 | for children in element.iterfind('./*'): |
| 197 | elements.append((element, position, children)) | 196 | elements_ins.append((element, position, children)) |
| 198 | break | 197 | break |
| 199 | for (element, position, children) in elements: | 198 | for (element, position, children) in elements_ins: |
| 200 | parent_map[element].insert(position, children) | 199 | parent_map[element].insert(position, children) |
| 201 | parent_map[element].remove(element) | 200 | parent_map[element].remove(element) |
| 202 | 201 | ||
| @@ -1,7 +1,7 @@ | |||
| 1 | #!/usr/bin/env python3 | 1 | #!/usr/bin/env python3 |
| 2 | 2 | ||
| 3 | import os | 3 | import os |
| 4 | from typing import Tuple | 4 | from typing import Tuple, Generator, List |
| 5 | import sys | 5 | import sys |
| 6 | import mimetypes | 6 | import mimetypes |
| 7 | import argparse | 7 | import argparse |
| @@ -16,6 +16,10 @@ except ValueError as e: | |||
| 16 | 16 | ||
| 17 | __version__ = '0.4.0' | 17 | __version__ = '0.4.0' |
| 18 | 18 | ||
| 19 | # Make pyflakes happy | ||
| 20 | assert Tuple | ||
| 21 | |||
| 22 | |||
| 19 | def __check_file(filename: str, mode: int=os.R_OK) -> bool: | 23 | def __check_file(filename: str, mode: int=os.R_OK) -> bool: |
| 20 | if not os.path.exists(filename): | 24 | if not os.path.exists(filename): |
| 21 | print("[-] %s is doesn't exist." % filename) | 25 | print("[-] %s is doesn't exist." % filename) |
| @@ -29,7 +33,7 @@ def __check_file(filename: str, mode: int=os.R_OK) -> bool: | |||
| 29 | return True | 33 | return True |
| 30 | 34 | ||
| 31 | 35 | ||
| 32 | def create_arg_parser(): | 36 | def create_arg_parser() -> argparse.ArgumentParser: |
| 33 | parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2') | 37 | parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2') |
| 34 | parser.add_argument('files', nargs='*', help='the files to process') | 38 | parser.add_argument('files', nargs='*', help='the files to process') |
| 35 | parser.add_argument('-v', '--version', action='version', | 39 | parser.add_argument('-v', '--version', action='version', |
| @@ -63,19 +67,18 @@ def show_meta(filename: str): | |||
| 63 | return | 67 | return |
| 64 | 68 | ||
| 65 | print("[+] Metadata for %s:" % filename) | 69 | print("[+] Metadata for %s:" % filename) |
| 66 | meta = p.get_meta().items() | 70 | metadata = p.get_meta().items() |
| 67 | if not meta: | 71 | if not metadata: |
| 68 | print(" No metadata found") | 72 | print(" No metadata found") |
| 69 | return | 73 | return |
| 70 | 74 | ||
| 71 | for k, v in meta: | 75 | for k, v in metadata: |
| 72 | try: # FIXME this is ugly. | 76 | try: # FIXME this is ugly. |
| 73 | print(" %s: %s" % (k, v)) | 77 | print(" %s: %s" % (k, v)) |
| 74 | except UnicodeEncodeError: | 78 | except UnicodeEncodeError: |
| 75 | print(" %s: harmful content" % k) | 79 | print(" %s: harmful content" % k) |
| 76 | 80 | ||
| 77 | def clean_meta(params: Tuple[str, bool, UnknownMemberPolicy]) -> bool: | 81 | def clean_meta(filename: str, is_lightweight: bool, policy: UnknownMemberPolicy) -> bool: |
| 78 | filename, is_lightweight, unknown_member_policy = params | ||
| 79 | if not __check_file(filename, os.R_OK|os.W_OK): | 82 | if not __check_file(filename, os.R_OK|os.W_OK): |
| 80 | return False | 83 | return False |
| 81 | 84 | ||
| @@ -83,7 +86,7 @@ def clean_meta(params: Tuple[str, bool, UnknownMemberPolicy]) -> bool: | |||
| 83 | if p is None: | 86 | if p is None: |
| 84 | print("[-] %s's format (%s) is not supported" % (filename, mtype)) | 87 | print("[-] %s's format (%s) is not supported" % (filename, mtype)) |
| 85 | return False | 88 | return False |
| 86 | p.unknown_member_policy = unknown_member_policy | 89 | p.unknown_member_policy = policy |
| 87 | if is_lightweight: | 90 | if is_lightweight: |
| 88 | return p.remove_all_lightweight() | 91 | return p.remove_all_lightweight() |
| 89 | return p.remove_all() | 92 | return p.remove_all() |
| @@ -91,7 +94,7 @@ def clean_meta(params: Tuple[str, bool, UnknownMemberPolicy]) -> bool: | |||
| 91 | 94 | ||
| 92 | def show_parsers(): | 95 | def show_parsers(): |
| 93 | print('[+] Supported formats:') | 96 | print('[+] Supported formats:') |
| 94 | formats = list() | 97 | formats = set() |
| 95 | for parser in parser_factory._get_parsers(): | 98 | for parser in parser_factory._get_parsers(): |
| 96 | for mtype in parser.mimetypes: | 99 | for mtype in parser.mimetypes: |
| 97 | extensions = set() | 100 | extensions = set() |
| @@ -102,11 +105,11 @@ def show_parsers(): | |||
| 102 | # we're not supporting a single extension in the current | 105 | # we're not supporting a single extension in the current |
| 103 | # mimetype, so there is not point in showing the mimetype at all | 106 | # mimetype, so there is not point in showing the mimetype at all |
| 104 | continue | 107 | continue |
| 105 | formats.append(' - %s (%s)' % (mtype, ', '.join(extensions))) | 108 | formats.add(' - %s (%s)' % (mtype, ', '.join(extensions))) |
| 106 | print('\n'.join(sorted(formats))) | 109 | print('\n'.join(sorted(formats))) |
| 107 | 110 | ||
| 108 | 111 | ||
| 109 | def __get_files_recursively(files): | 112 | def __get_files_recursively(files: List[str]) -> Generator[str, None, None]: |
| 110 | for f in files: | 113 | for f in files: |
| 111 | if os.path.isdir(f): | 114 | if os.path.isdir(f): |
| 112 | for path, _, _files in os.walk(f): | 115 | for path, _, _files in os.walk(f): |
| @@ -141,13 +144,13 @@ def main(): | |||
| 141 | return 0 | 144 | return 0 |
| 142 | 145 | ||
| 143 | else: | 146 | else: |
| 144 | unknown_member_policy = UnknownMemberPolicy(args.unknown_members) | 147 | policy = UnknownMemberPolicy(args.unknown_members) |
| 145 | if unknown_member_policy == UnknownMemberPolicy.KEEP: | 148 | if policy == UnknownMemberPolicy.KEEP: |
| 146 | logging.warning('Keeping unknown member files may leak metadata in the resulting file!') | 149 | logging.warning('Keeping unknown member files may leak metadata in the resulting file!') |
| 147 | 150 | ||
| 148 | no_failure = True | 151 | no_failure = True |
| 149 | for f in __get_files_recursively(args.files): | 152 | for f in __get_files_recursively(args.files): |
| 150 | if clean_meta([f, args.lightweight, unknown_member_policy]) is False: | 153 | if clean_meta(f, args.lightweight, policy) is False: |
| 151 | no_failure = False | 154 | no_failure = False |
| 152 | return 0 if no_failure is True else -1 | 155 | return 0 if no_failure is True else -1 |
| 153 | 156 | ||
