diff options
| author | jvoisin | 2018-10-22 19:12:39 +0200 |
|---|---|---|
| committer | jvoisin | 2018-10-23 13:49:58 +0200 |
| commit | 38df679a88a19db3a4a82fdb8e20a42c9a53d1a1 (patch) | |
| tree | e7eea36bae3341a97e2b9a730964cfe632fb74f5 /libmat2 | |
| parent | 44f267a5964ea8dbb59c26c319e43fad84afb45a (diff) | |
Optimize the handling of problematic files
Diffstat (limited to 'libmat2')
| -rw-r--r-- | libmat2/abstract.py | 6 | ||||
| -rw-r--r-- | libmat2/exiftool.py | 23 | ||||
| -rw-r--r-- | libmat2/video.py | 10 |
3 files changed, 11 insertions, 28 deletions
diff --git a/libmat2/abstract.py b/libmat2/abstract.py index 414a68b..9b510f6 100644 --- a/libmat2/abstract.py +++ b/libmat2/abstract.py | |||
| @@ -1,5 +1,6 @@ | |||
| 1 | import abc | 1 | import abc |
| 2 | import os | 2 | import os |
| 3 | import re | ||
| 3 | from typing import Set, Dict, Union | 4 | from typing import Set, Dict, Union |
| 4 | 5 | ||
| 5 | assert Set # make pyflakes happy | 6 | assert Set # make pyflakes happy |
| @@ -17,6 +18,11 @@ class AbstractParser(abc.ABC): | |||
| 17 | """ | 18 | """ |
| 18 | :raises ValueError: Raised upon an invalid file | 19 | :raises ValueError: Raised upon an invalid file |
| 19 | """ | 20 | """ |
| 21 | if re.search('^[a-z0-9./]', filename) is None: | ||
| 22 | # Some parsers are calling external binaries, | ||
| 23 | # this prevents shell command injections | ||
| 24 | filename = os.path.join('.', filename) | ||
| 25 | |||
| 20 | self.filename = filename | 26 | self.filename = filename |
| 21 | fname, extension = os.path.splitext(filename) | 27 | fname, extension = os.path.splitext(filename) |
| 22 | self.output_filename = fname + '.cleaned' + extension | 28 | self.output_filename = fname + '.cleaned' + extension |
diff --git a/libmat2/exiftool.py b/libmat2/exiftool.py index 331ae0c..11dd36d 100644 --- a/libmat2/exiftool.py +++ b/libmat2/exiftool.py | |||
| @@ -1,11 +1,7 @@ | |||
| 1 | import json | 1 | import json |
| 2 | import os | 2 | import os |
| 3 | import re | ||
| 4 | import shutil | ||
| 5 | import subprocess | 3 | import subprocess |
| 6 | import tempfile | 4 | from typing import Dict, Union, Set |
| 7 | |||
| 8 | from typing import Dict, Union, Set, Callable, Any | ||
| 9 | 5 | ||
| 10 | from . import abstract | 6 | from . import abstract |
| 11 | 7 | ||
| @@ -20,23 +16,8 @@ class ExiftoolParser(abstract.AbstractParser): | |||
| 20 | """ | 16 | """ |
| 21 | meta_whitelist = set() # type: Set[str] | 17 | meta_whitelist = set() # type: Set[str] |
| 22 | 18 | ||
| 23 | def _handle_problematic_filename(self, callback: Callable[[str], Any]) -> bytes: | ||
| 24 | """ This method takes a filename with a potentially problematic name, | ||
| 25 | and safely applies a `callback` to it. | ||
| 26 | """ | ||
| 27 | if re.search('^[a-z0-9/]', self.filename) is not None: | ||
| 28 | return callback(self.filename) | ||
| 29 | |||
| 30 | tmpdirname = tempfile.mkdtemp() | ||
| 31 | fname = os.path.join(tmpdirname, "temp_file") | ||
| 32 | shutil.copy(self.filename, fname) | ||
| 33 | out = callback(fname) | ||
| 34 | shutil.rmtree(tmpdirname) | ||
| 35 | return out | ||
| 36 | |||
| 37 | def get_meta(self) -> Dict[str, Union[str, dict]]: | 19 | def get_meta(self) -> Dict[str, Union[str, dict]]: |
| 38 | fun = lambda f: subprocess.check_output([_get_exiftool_path(), '-json', f]) | 20 | out = subprocess.check_output([_get_exiftool_path(), '-json', self.filename]) |
| 39 | out = self._handle_problematic_filename(fun) | ||
| 40 | meta = json.loads(out.decode('utf-8'))[0] | 21 | meta = json.loads(out.decode('utf-8'))[0] |
| 41 | for key in self.meta_whitelist: | 22 | for key in self.meta_whitelist: |
| 42 | meta.pop(key, None) | 23 | meta.pop(key, None) |
diff --git a/libmat2/video.py b/libmat2/video.py index 2fa65e8..fe2a1af 100644 --- a/libmat2/video.py +++ b/libmat2/video.py | |||
| @@ -24,10 +24,9 @@ class AVIParser(exiftool.ExiftoolParser): | |||
| 24 | 'SampleRate', 'AvgBytesPerSec', 'BitsPerSample', | 24 | 'SampleRate', 'AvgBytesPerSec', 'BitsPerSample', |
| 25 | 'Duration', 'ImageSize', 'Megapixels'} | 25 | 'Duration', 'ImageSize', 'Megapixels'} |
| 26 | 26 | ||
| 27 | 27 | def remove_all(self): | |
| 28 | def __remove_all_internal(self, filename: str): | ||
| 29 | cmd = [_get_ffmpeg_path(), | 28 | cmd = [_get_ffmpeg_path(), |
| 30 | '-i', filename, # input file | 29 | '-i', self.filename, # input file |
| 31 | '-y', # overwrite existing output file | 30 | '-y', # overwrite existing output file |
| 32 | '-loglevel', 'panic', # Don't show log | 31 | '-loglevel', 'panic', # Don't show log |
| 33 | '-hide_banner', # hide the banner | 32 | '-hide_banner', # hide the banner |
| @@ -38,11 +37,8 @@ class AVIParser(exiftool.ExiftoolParser): | |||
| 38 | '-flags:v', '+bitexact', # don't add any metadata | 37 | '-flags:v', '+bitexact', # don't add any metadata |
| 39 | '-flags:a', '+bitexact', # don't add any metadata | 38 | '-flags:a', '+bitexact', # don't add any metadata |
| 40 | self.output_filename] | 39 | self.output_filename] |
| 41 | subprocess.check_call(cmd) | ||
| 42 | |||
| 43 | def remove_all(self) -> bool: | ||
| 44 | try: | 40 | try: |
| 45 | self._handle_problematic_filename(self.__remove_all_internal) | 41 | subprocess.check_call(cmd) |
| 46 | except subprocess.CalledProcessError as e: | 42 | except subprocess.CalledProcessError as e: |
| 47 | logging.error("Something went wrong during the processing of %s: %s", self.filename, e) | 43 | logging.error("Something went wrong during the processing of %s: %s", self.filename, e) |
| 48 | return False | 44 | return False |
