diff options
| author | intrigeri | 2019-02-03 09:43:27 +0000 |
|---|---|---|
| committer | jvoisin | 2019-02-03 19:18:41 +0100 |
| commit | e8c1bb0e3c4cae579e81ce6a4b01b829900ff922 (patch) | |
| tree | cd7146283c98f25544334cdd322b8c577ccc40d3 | |
| parent | 8b5d0c286c91537b43eb3284aa93b382636e7ebf (diff) | |
Whenever possible, use bwrap for subprocesses
This should closes #90
| -rw-r--r-- | .gitlab-ci.yml | 11 | ||||
| -rw-r--r-- | INSTALL.md | 6 | ||||
| -rw-r--r-- | libmat2/__init__.py | 6 | ||||
| -rw-r--r-- | libmat2/abstract.py | 1 | ||||
| -rw-r--r-- | libmat2/archive.py | 2 | ||||
| -rw-r--r-- | libmat2/exiftool.py | 10 | ||||
| -rw-r--r-- | libmat2/office.py | 1 | ||||
| -rw-r--r-- | libmat2/parser_factory.py | 3 | ||||
| -rw-r--r-- | libmat2/subprocess.py | 100 | ||||
| -rw-r--r-- | libmat2/torrent.py | 3 | ||||
| -rw-r--r-- | libmat2/video.py | 6 |
11 files changed, 137 insertions, 12 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 10e0009..f74cc9e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml | |||
| @@ -42,6 +42,17 @@ tests:debian: | |||
| 42 | script: | 42 | script: |
| 43 | - apt-get -qqy update | 43 | - apt-get -qqy update |
| 44 | - apt-get -qqy install --no-install-recommends python3-mutagen python3-gi-cairo gir1.2-poppler-0.18 gir1.2-gdkpixbuf-2.0 libimage-exiftool-perl python3-coverage ffmpeg | 44 | - apt-get -qqy install --no-install-recommends python3-mutagen python3-gi-cairo gir1.2-poppler-0.18 gir1.2-gdkpixbuf-2.0 libimage-exiftool-perl python3-coverage ffmpeg |
| 45 | - apt-get -qqy purge bubblewrap | ||
| 46 | - python3-coverage run --branch -m unittest discover -s tests/ | ||
| 47 | - python3-coverage report --fail-under=90 -m --include 'libmat2/*' | ||
| 48 | |||
| 49 | tests:debian_with_bubblewrap: | ||
| 50 | stage: test | ||
| 51 | tags: | ||
| 52 | - whitewhale | ||
| 53 | script: | ||
| 54 | - apt-get -qqy update | ||
| 55 | - apt-get -qqy install --no-install-recommends python3-mutagen python3-gi-cairo gir1.2-poppler-0.18 gir1.2-gdkpixbuf-2.0 libimage-exiftool-perl python3-coverage ffmpeg bubblewrap | ||
| 45 | - python3-coverage run --branch -m unittest discover -s tests/ | 56 | - python3-coverage run --branch -m unittest discover -s tests/ |
| 46 | - python3-coverage report --fail-under=100 -m --include 'libmat2/*' | 57 | - python3-coverage report --fail-under=100 -m --include 'libmat2/*' |
| 47 | 58 | ||
| @@ -9,9 +9,13 @@ installed like this: | |||
| 9 | pip3 install mat2 | 9 | pip3 install mat2 |
| 10 | ``` | 10 | ``` |
| 11 | 11 | ||
| 12 | |||
| 13 | # GNU/Linux | 12 | # GNU/Linux |
| 14 | 13 | ||
| 14 | ## Optional dependencies | ||
| 15 | |||
| 16 | When [bubblewrap](https://github.com/projectatomic/bubblewrap) is | ||
| 17 | installed, MAT2 uses it to sandbox any external processes it invokes. | ||
| 18 | |||
| 15 | ## Fedora | 19 | ## Fedora |
| 16 | 20 | ||
| 17 | Thanks to [atenart](https://ack.tf/), there is a package available on | 21 | Thanks to [atenart](https://ack.tf/), there is a package available on |
diff --git a/libmat2/__init__.py b/libmat2/__init__.py index 58cf976..be7067b 100644 --- a/libmat2/__init__.py +++ b/libmat2/__init__.py | |||
| @@ -39,12 +39,11 @@ DEPENDENCIES = { | |||
| 39 | } | 39 | } |
| 40 | 40 | ||
| 41 | 41 | ||
| 42 | |||
| 43 | def check_dependencies() -> Dict[str, bool]: | 42 | def check_dependencies() -> Dict[str, bool]: |
| 44 | ret = collections.defaultdict(bool) # type: Dict[str, bool] | 43 | ret = collections.defaultdict(bool) # type: Dict[str, bool] |
| 45 | 44 | ||
| 46 | ret['Exiftool'] = True if exiftool._get_exiftool_path() else False | 45 | ret['Exiftool'] = bool(exiftool._get_exiftool_path()) |
| 47 | ret['Ffmpeg'] = True if video._get_ffmpeg_path() else False | 46 | ret['Ffmpeg'] = bool(video._get_ffmpeg_path()) |
| 48 | 47 | ||
| 49 | for key, value in DEPENDENCIES.items(): | 48 | for key, value in DEPENDENCIES.items(): |
| 50 | ret[value] = True | 49 | ret[value] = True |
| @@ -55,6 +54,7 @@ def check_dependencies() -> Dict[str, bool]: | |||
| 55 | 54 | ||
| 56 | return ret | 55 | return ret |
| 57 | 56 | ||
| 57 | |||
| 58 | @enum.unique | 58 | @enum.unique |
| 59 | class UnknownMemberPolicy(enum.Enum): | 59 | class UnknownMemberPolicy(enum.Enum): |
| 60 | ABORT = 'abort' | 60 | ABORT = 'abort' |
diff --git a/libmat2/abstract.py b/libmat2/abstract.py index 9b510f6..aaf00d7 100644 --- a/libmat2/abstract.py +++ b/libmat2/abstract.py | |||
| @@ -37,4 +37,5 @@ class AbstractParser(abc.ABC): | |||
| 37 | """ | 37 | """ |
| 38 | :raises RuntimeError: Raised if the cleaning process went wrong. | 38 | :raises RuntimeError: Raised if the cleaning process went wrong. |
| 39 | """ | 39 | """ |
| 40 | # pylint: disable=unnecessary-pass | ||
| 40 | pass # pragma: no cover | 41 | pass # pragma: no cover |
diff --git a/libmat2/archive.py b/libmat2/archive.py index bcf8d33..b2483fc 100644 --- a/libmat2/archive.py +++ b/libmat2/archive.py | |||
| @@ -132,7 +132,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): | |||
| 132 | logging.warning("In file %s, keeping unknown element %s (format: %s)", | 132 | logging.warning("In file %s, keeping unknown element %s (format: %s)", |
| 133 | self.filename, item.filename, mtype) | 133 | self.filename, item.filename, mtype) |
| 134 | else: | 134 | else: |
| 135 | logging.error("In file %s, element %s's format (%s) " + | 135 | logging.error("In file %s, element %s's format (%s) " \ |
| 136 | "isn't supported", | 136 | "isn't supported", |
| 137 | self.filename, item.filename, mtype) | 137 | self.filename, item.filename, mtype) |
| 138 | abort = True | 138 | abort = True |
diff --git a/libmat2/exiftool.py b/libmat2/exiftool.py index adec28e..db92f60 100644 --- a/libmat2/exiftool.py +++ b/libmat2/exiftool.py | |||
| @@ -1,10 +1,10 @@ | |||
| 1 | import json | 1 | import json |
| 2 | import logging | 2 | import logging |
| 3 | import os | 3 | import os |
| 4 | import subprocess | ||
| 5 | from typing import Dict, Union, Set | 4 | from typing import Dict, Union, Set |
| 6 | 5 | ||
| 7 | from . import abstract | 6 | from . import abstract |
| 7 | from . import subprocess | ||
| 8 | 8 | ||
| 9 | # Make pyflakes happy | 9 | # Make pyflakes happy |
| 10 | assert Set | 10 | assert Set |
| @@ -18,7 +18,9 @@ class ExiftoolParser(abstract.AbstractParser): | |||
| 18 | meta_whitelist = set() # type: Set[str] | 18 | meta_whitelist = set() # type: Set[str] |
| 19 | 19 | ||
| 20 | def get_meta(self) -> Dict[str, Union[str, dict]]: | 20 | def get_meta(self) -> Dict[str, Union[str, dict]]: |
| 21 | out = subprocess.check_output([_get_exiftool_path(), '-json', self.filename]) | 21 | out = subprocess.run([_get_exiftool_path(), '-json', self.filename], |
| 22 | input_filename=self.filename, | ||
| 23 | check=True, stdout=subprocess.PIPE).stdout | ||
| 22 | meta = json.loads(out.decode('utf-8'))[0] | 24 | meta = json.loads(out.decode('utf-8'))[0] |
| 23 | for key in self.meta_whitelist: | 25 | for key in self.meta_whitelist: |
| 24 | meta.pop(key, None) | 26 | meta.pop(key, None) |
| @@ -46,7 +48,9 @@ class ExiftoolParser(abstract.AbstractParser): | |||
| 46 | '-o', self.output_filename, | 48 | '-o', self.output_filename, |
| 47 | self.filename] | 49 | self.filename] |
| 48 | try: | 50 | try: |
| 49 | subprocess.check_call(cmd) | 51 | subprocess.run(cmd, check=True, |
| 52 | input_filename=self.filename, | ||
| 53 | output_filename=self.output_filename) | ||
| 50 | except subprocess.CalledProcessError as e: # pragma: no cover | 54 | except subprocess.CalledProcessError as e: # pragma: no cover |
| 51 | logging.error("Something went wrong during the processing of %s: %s", self.filename, e) | 55 | logging.error("Something went wrong during the processing of %s: %s", self.filename, e) |
| 52 | return False | 56 | return False |
diff --git a/libmat2/office.py b/libmat2/office.py index e6370e7..365c230 100644 --- a/libmat2/office.py +++ b/libmat2/office.py | |||
| @@ -266,7 +266,6 @@ class MSOfficeParser(ArchiveBasedAbstractParser): | |||
| 266 | f.write(b'<cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties">') | 266 | f.write(b'<cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties">') |
| 267 | f.write(b'</cp:coreProperties>') | 267 | f.write(b'</cp:coreProperties>') |
| 268 | 268 | ||
| 269 | |||
| 270 | if self.__remove_rsid(full_path) is False: | 269 | if self.__remove_rsid(full_path) is False: |
| 271 | return False | 270 | return False |
| 272 | 271 | ||
diff --git a/libmat2/parser_factory.py b/libmat2/parser_factory.py index 4a0ca0d..30c3b52 100644 --- a/libmat2/parser_factory.py +++ b/libmat2/parser_factory.py | |||
| @@ -10,6 +10,7 @@ assert Tuple # make pyflakes happy | |||
| 10 | 10 | ||
| 11 | T = TypeVar('T', bound='abstract.AbstractParser') | 11 | T = TypeVar('T', bound='abstract.AbstractParser') |
| 12 | 12 | ||
| 13 | |||
| 13 | def __load_all_parsers(): | 14 | def __load_all_parsers(): |
| 14 | """ Loads every parser in a dynamic way """ | 15 | """ Loads every parser in a dynamic way """ |
| 15 | current_dir = os.path.dirname(__file__) | 16 | current_dir = os.path.dirname(__file__) |
| @@ -24,8 +25,10 @@ def __load_all_parsers(): | |||
| 24 | name, _ = os.path.splitext(basename) | 25 | name, _ = os.path.splitext(basename) |
| 25 | importlib.import_module('.' + name, package='libmat2') | 26 | importlib.import_module('.' + name, package='libmat2') |
| 26 | 27 | ||
| 28 | |||
| 27 | __load_all_parsers() | 29 | __load_all_parsers() |
| 28 | 30 | ||
| 31 | |||
| 29 | def _get_parsers() -> List[T]: | 32 | def _get_parsers() -> List[T]: |
| 30 | """ Get all our parsers!""" | 33 | """ Get all our parsers!""" |
| 31 | def __get_parsers(cls): | 34 | def __get_parsers(cls): |
diff --git a/libmat2/subprocess.py b/libmat2/subprocess.py new file mode 100644 index 0000000..25646f8 --- /dev/null +++ b/libmat2/subprocess.py | |||
| @@ -0,0 +1,100 @@ | |||
| 1 | """ | ||
| 2 | Wrapper around a subset of the subprocess module, | ||
| 3 | that uses bwrap (bubblewrap) when it is available. | ||
| 4 | |||
| 5 | Instead of importing subprocess, other modules should use this as follows: | ||
| 6 | |||
| 7 | from . import subprocess | ||
| 8 | """ | ||
| 9 | |||
| 10 | import os | ||
| 11 | import shutil | ||
| 12 | import subprocess | ||
| 13 | import tempfile | ||
| 14 | from typing import List, Optional | ||
| 15 | |||
| 16 | |||
| 17 | __all__ = ['PIPE', 'run', 'CalledProcessError'] | ||
| 18 | PIPE = subprocess.PIPE | ||
| 19 | CalledProcessError = subprocess.CalledProcessError | ||
| 20 | |||
| 21 | |||
| 22 | def _get_bwrap_path() -> str: | ||
| 23 | bwrap_path = '/usr/bin/bwrap' | ||
| 24 | if os.path.isfile(bwrap_path): | ||
| 25 | if os.access(bwrap_path, os.X_OK): | ||
| 26 | return bwrap_path | ||
| 27 | |||
| 28 | raise RuntimeError("Unable to find bwrap") # pragma: no cover | ||
| 29 | |||
| 30 | |||
| 31 | # pylint: disable=bad-whitespace | ||
| 32 | def _get_bwrap_args(tempdir: str, | ||
| 33 | input_filename: str, | ||
| 34 | output_filename: Optional[str] = None) -> List[str]: | ||
| 35 | cwd = os.getcwd() | ||
| 36 | |||
| 37 | # XXX: use --ro-bind-try once all supported platforms | ||
| 38 | # have a bubblewrap recent enough to support it. | ||
| 39 | ro_bind_dirs = ['/usr', '/lib', '/lib64', '/bin', '/sbin', cwd] | ||
| 40 | ro_bind_args = [] | ||
| 41 | for bind_dir in ro_bind_dirs: | ||
| 42 | if os.path.isdir(bind_dir): # pragma: no cover | ||
| 43 | ro_bind_args.extend(['--ro-bind', bind_dir, bind_dir]) | ||
| 44 | |||
| 45 | args = ro_bind_args + \ | ||
| 46 | ['--dev', '/dev', | ||
| 47 | '--chdir', cwd, | ||
| 48 | '--unshare-all', | ||
| 49 | '--new-session', | ||
| 50 | # XXX: enable --die-with-parent once all supported platforms have | ||
| 51 | # a bubblewrap recent enough to support it. | ||
| 52 | # '--die-with-parent', | ||
| 53 | ] | ||
| 54 | |||
| 55 | if output_filename: | ||
| 56 | # Mount an empty temporary directory where the sandboxed | ||
| 57 | # process will create its output file | ||
| 58 | output_dirname = os.path.dirname(os.path.abspath(output_filename)) | ||
| 59 | args.extend(['--bind', tempdir, output_dirname]) | ||
| 60 | |||
| 61 | absolute_input_filename = os.path.abspath(input_filename) | ||
| 62 | args.extend(['--ro-bind', absolute_input_filename, absolute_input_filename]) | ||
| 63 | |||
| 64 | return args | ||
| 65 | |||
| 66 | |||
| 67 | # pylint: disable=bad-whitespace | ||
| 68 | def run(args: List[str], | ||
| 69 | input_filename: str, | ||
| 70 | output_filename: Optional[str] = None, | ||
| 71 | **kwargs) -> subprocess.CompletedProcess: | ||
| 72 | """Wrapper around `subprocess.run`, that uses bwrap (bubblewrap) if it | ||
| 73 | is available. | ||
| 74 | |||
| 75 | Extra supported keyword arguments: | ||
| 76 | |||
| 77 | - `input_filename`, made available read-only in the sandbox | ||
| 78 | - `output_filename`, where the file created by the sandboxed process | ||
| 79 | is copied upon successful completion; an empty temporary directory | ||
| 80 | is made visible as the parent directory of this file in the sandbox. | ||
| 81 | Optional: one valid use case is to invoke an external process | ||
| 82 | to inspect metadata present in a file. | ||
| 83 | """ | ||
| 84 | try: | ||
| 85 | bwrap_path = _get_bwrap_path() | ||
| 86 | except RuntimeError: # pragma: no cover | ||
| 87 | # bubblewrap is not installed ⇒ short-circuit | ||
| 88 | return subprocess.run(args, **kwargs) | ||
| 89 | |||
| 90 | with tempfile.TemporaryDirectory() as tempdir: | ||
| 91 | prefix_args = [bwrap_path] + \ | ||
| 92 | _get_bwrap_args(input_filename=input_filename, | ||
| 93 | output_filename=output_filename, | ||
| 94 | tempdir=tempdir) | ||
| 95 | completed_process = subprocess.run(prefix_args + args, **kwargs) | ||
| 96 | if output_filename and completed_process.returncode == 0: | ||
| 97 | shutil.copy(os.path.join(tempdir, os.path.basename(output_filename)), | ||
| 98 | output_filename) | ||
| 99 | |||
| 100 | return completed_process | ||
diff --git a/libmat2/torrent.py b/libmat2/torrent.py index 4d6c1e0..c006f9c 100644 --- a/libmat2/torrent.py +++ b/libmat2/torrent.py | |||
| @@ -3,6 +3,7 @@ from typing import Union, Tuple, Dict | |||
| 3 | 3 | ||
| 4 | from . import abstract | 4 | from . import abstract |
| 5 | 5 | ||
| 6 | |||
| 6 | class TorrentParser(abstract.AbstractParser): | 7 | class TorrentParser(abstract.AbstractParser): |
| 7 | mimetypes = {'application/x-bittorrent', } | 8 | mimetypes = {'application/x-bittorrent', } |
| 8 | whitelist = {b'announce', b'announce-list', b'info'} | 9 | whitelist = {b'announce', b'announce-list', b'info'} |
| @@ -32,7 +33,7 @@ class TorrentParser(abstract.AbstractParser): | |||
| 32 | return True | 33 | return True |
| 33 | 34 | ||
| 34 | 35 | ||
| 35 | class _BencodeHandler(object): | 36 | class _BencodeHandler(): |
| 36 | """ | 37 | """ |
| 37 | Since bencode isn't that hard to parse, | 38 | Since bencode isn't that hard to parse, |
| 38 | MAT2 comes with its own parser, based on the spec | 39 | MAT2 comes with its own parser, based on the spec |
diff --git a/libmat2/video.py b/libmat2/video.py index 825df92..9dc13e1 100644 --- a/libmat2/video.py +++ b/libmat2/video.py | |||
| @@ -1,10 +1,10 @@ | |||
| 1 | import os | 1 | import os |
| 2 | import subprocess | ||
| 3 | import logging | 2 | import logging |
| 4 | 3 | ||
| 5 | from typing import Dict, Union | 4 | from typing import Dict, Union |
| 6 | 5 | ||
| 7 | from . import exiftool | 6 | from . import exiftool |
| 7 | from . import subprocess | ||
| 8 | 8 | ||
| 9 | 9 | ||
| 10 | class AbstractFFmpegParser(exiftool.ExiftoolParser): | 10 | class AbstractFFmpegParser(exiftool.ExiftoolParser): |
| @@ -32,7 +32,9 @@ class AbstractFFmpegParser(exiftool.ExiftoolParser): | |||
| 32 | '-flags:a', '+bitexact', # don't add any metadata | 32 | '-flags:a', '+bitexact', # don't add any metadata |
| 33 | self.output_filename] | 33 | self.output_filename] |
| 34 | try: | 34 | try: |
| 35 | subprocess.check_call(cmd) | 35 | subprocess.run(cmd, check=True, |
| 36 | input_filename=self.filename, | ||
| 37 | output_filename=self.output_filename) | ||
| 36 | except subprocess.CalledProcessError as e: | 38 | except subprocess.CalledProcessError as e: |
| 37 | logging.error("Something went wrong during the processing of %s: %s", self.filename, e) | 39 | logging.error("Something went wrong during the processing of %s: %s", self.filename, e) |
| 38 | return False | 40 | return False |
