diff options
| author | jvoisin | 2018-10-22 19:12:39 +0200 |
|---|---|---|
| committer | jvoisin | 2018-10-23 13:49:58 +0200 |
| commit | 38df679a88a19db3a4a82fdb8e20a42c9a53d1a1 (patch) | |
| tree | e7eea36bae3341a97e2b9a730964cfe632fb74f5 | |
| parent | 44f267a5964ea8dbb59c26c319e43fad84afb45a (diff) | |
Optimize the handling of problematic files
| -rw-r--r-- | libmat2/abstract.py | 6 | ||||
| -rw-r--r-- | libmat2/exiftool.py | 23 | ||||
| -rw-r--r-- | libmat2/video.py | 10 | ||||
| -rw-r--r-- | tests/test_corrupted_files.py | 11 | ||||
| -rw-r--r-- | tests/test_libmat2.py | 15 |
5 files changed, 36 insertions, 29 deletions
diff --git a/libmat2/abstract.py b/libmat2/abstract.py index 414a68b..9b510f6 100644 --- a/libmat2/abstract.py +++ b/libmat2/abstract.py | |||
| @@ -1,5 +1,6 @@ | |||
| 1 | import abc | 1 | import abc |
| 2 | import os | 2 | import os |
| 3 | import re | ||
| 3 | from typing import Set, Dict, Union | 4 | from typing import Set, Dict, Union |
| 4 | 5 | ||
| 5 | assert Set # make pyflakes happy | 6 | assert Set # make pyflakes happy |
| @@ -17,6 +18,11 @@ class AbstractParser(abc.ABC): | |||
| 17 | """ | 18 | """ |
| 18 | :raises ValueError: Raised upon an invalid file | 19 | :raises ValueError: Raised upon an invalid file |
| 19 | """ | 20 | """ |
| 21 | if re.search('^[a-z0-9./]', filename) is None: | ||
| 22 | # Some parsers are calling external binaries, | ||
| 23 | # this prevents shell command injections | ||
| 24 | filename = os.path.join('.', filename) | ||
| 25 | |||
| 20 | self.filename = filename | 26 | self.filename = filename |
| 21 | fname, extension = os.path.splitext(filename) | 27 | fname, extension = os.path.splitext(filename) |
| 22 | self.output_filename = fname + '.cleaned' + extension | 28 | self.output_filename = fname + '.cleaned' + extension |
diff --git a/libmat2/exiftool.py b/libmat2/exiftool.py index 331ae0c..11dd36d 100644 --- a/libmat2/exiftool.py +++ b/libmat2/exiftool.py | |||
| @@ -1,11 +1,7 @@ | |||
| 1 | import json | 1 | import json |
| 2 | import os | 2 | import os |
| 3 | import re | ||
| 4 | import shutil | ||
| 5 | import subprocess | 3 | import subprocess |
| 6 | import tempfile | 4 | from typing import Dict, Union, Set |
| 7 | |||
| 8 | from typing import Dict, Union, Set, Callable, Any | ||
| 9 | 5 | ||
| 10 | from . import abstract | 6 | from . import abstract |
| 11 | 7 | ||
| @@ -20,23 +16,8 @@ class ExiftoolParser(abstract.AbstractParser): | |||
| 20 | """ | 16 | """ |
| 21 | meta_whitelist = set() # type: Set[str] | 17 | meta_whitelist = set() # type: Set[str] |
| 22 | 18 | ||
| 23 | def _handle_problematic_filename(self, callback: Callable[[str], Any]) -> bytes: | ||
| 24 | """ This method takes a filename with a potentially problematic name, | ||
| 25 | and safely applies a `callback` to it. | ||
| 26 | """ | ||
| 27 | if re.search('^[a-z0-9/]', self.filename) is not None: | ||
| 28 | return callback(self.filename) | ||
| 29 | |||
| 30 | tmpdirname = tempfile.mkdtemp() | ||
| 31 | fname = os.path.join(tmpdirname, "temp_file") | ||
| 32 | shutil.copy(self.filename, fname) | ||
| 33 | out = callback(fname) | ||
| 34 | shutil.rmtree(tmpdirname) | ||
| 35 | return out | ||
| 36 | |||
| 37 | def get_meta(self) -> Dict[str, Union[str, dict]]: | 19 | def get_meta(self) -> Dict[str, Union[str, dict]]: |
| 38 | fun = lambda f: subprocess.check_output([_get_exiftool_path(), '-json', f]) | 20 | out = subprocess.check_output([_get_exiftool_path(), '-json', self.filename]) |
| 39 | out = self._handle_problematic_filename(fun) | ||
| 40 | meta = json.loads(out.decode('utf-8'))[0] | 21 | meta = json.loads(out.decode('utf-8'))[0] |
| 41 | for key in self.meta_whitelist: | 22 | for key in self.meta_whitelist: |
| 42 | meta.pop(key, None) | 23 | meta.pop(key, None) |
diff --git a/libmat2/video.py b/libmat2/video.py index 2fa65e8..fe2a1af 100644 --- a/libmat2/video.py +++ b/libmat2/video.py | |||
| @@ -24,10 +24,9 @@ class AVIParser(exiftool.ExiftoolParser): | |||
| 24 | 'SampleRate', 'AvgBytesPerSec', 'BitsPerSample', | 24 | 'SampleRate', 'AvgBytesPerSec', 'BitsPerSample', |
| 25 | 'Duration', 'ImageSize', 'Megapixels'} | 25 | 'Duration', 'ImageSize', 'Megapixels'} |
| 26 | 26 | ||
| 27 | 27 | def remove_all(self): | |
| 28 | def __remove_all_internal(self, filename: str): | ||
| 29 | cmd = [_get_ffmpeg_path(), | 28 | cmd = [_get_ffmpeg_path(), |
| 30 | '-i', filename, # input file | 29 | '-i', self.filename, # input file |
| 31 | '-y', # overwrite existing output file | 30 | '-y', # overwrite existing output file |
| 32 | '-loglevel', 'panic', # Don't show log | 31 | '-loglevel', 'panic', # Don't show log |
| 33 | '-hide_banner', # hide the banner | 32 | '-hide_banner', # hide the banner |
| @@ -38,11 +37,8 @@ class AVIParser(exiftool.ExiftoolParser): | |||
| 38 | '-flags:v', '+bitexact', # don't add any metadata | 37 | '-flags:v', '+bitexact', # don't add any metadata |
| 39 | '-flags:a', '+bitexact', # don't add any metadata | 38 | '-flags:a', '+bitexact', # don't add any metadata |
| 40 | self.output_filename] | 39 | self.output_filename] |
| 41 | subprocess.check_call(cmd) | ||
| 42 | |||
| 43 | def remove_all(self) -> bool: | ||
| 44 | try: | 40 | try: |
| 45 | self._handle_problematic_filename(self.__remove_all_internal) | 41 | subprocess.check_call(cmd) |
| 46 | except subprocess.CalledProcessError as e: | 42 | except subprocess.CalledProcessError as e: |
| 47 | logging.error("Something went wrong during the processing of %s: %s", self.filename, e) | 43 | logging.error("Something went wrong during the processing of %s: %s", self.filename, e) |
| 48 | return False | 44 | return False |
diff --git a/tests/test_corrupted_files.py b/tests/test_corrupted_files.py index 490ea42..82c6c3b 100644 --- a/tests/test_corrupted_files.py +++ b/tests/test_corrupted_files.py | |||
| @@ -204,3 +204,14 @@ class TestCorruptedFiles(unittest.TestCase): | |||
| 204 | p = video.AVIParser('./tests/data/clean.avi') | 204 | p = video.AVIParser('./tests/data/clean.avi') |
| 205 | self.assertFalse(p.remove_all()) | 205 | self.assertFalse(p.remove_all()) |
| 206 | os.remove('./tests/data/clean.avi') | 206 | os.remove('./tests/data/clean.avi') |
| 207 | |||
| 208 | def test_avi_injection(self): | ||
| 209 | try: | ||
| 210 | video._get_ffmpeg_path() | ||
| 211 | except RuntimeError: | ||
| 212 | raise unittest.SkipTest | ||
| 213 | |||
| 214 | shutil.copy('./tests/data/dirty.torrent', './tests/data/--output.avi') | ||
| 215 | p = video.AVIParser('./tests/data/--output.avi') | ||
| 216 | self.assertFalse(p.remove_all()) | ||
| 217 | os.remove('./tests/data/--output.avi') | ||
diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index 241c6eb..f5fc9e8 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py | |||
| @@ -46,10 +46,23 @@ class TestParameterInjection(unittest.TestCase): | |||
| 46 | shutil.copy('./tests/data/dirty.avi', './--output') | 46 | shutil.copy('./tests/data/dirty.avi', './--output') |
| 47 | p = video.AVIParser('--output') | 47 | p = video.AVIParser('--output') |
| 48 | meta = p.get_meta() | 48 | meta = p.get_meta() |
| 49 | print(meta) | ||
| 50 | self.assertEqual(meta['Software'], 'MEncoder SVN-r33148-4.0.1') | 49 | self.assertEqual(meta['Software'], 'MEncoder SVN-r33148-4.0.1') |
| 51 | os.remove('--output') | 50 | os.remove('--output') |
| 52 | 51 | ||
| 52 | def test_ffmpeg_injection_complete_path(self): | ||
| 53 | try: | ||
| 54 | video._get_ffmpeg_path() | ||
| 55 | except RuntimeError: | ||
| 56 | raise unittest.SkipTest | ||
| 57 | |||
| 58 | shutil.copy('./tests/data/dirty.avi', './tests/data/ --output.avi') | ||
| 59 | p = video.AVIParser('./tests/data/ --output.avi') | ||
| 60 | meta = p.get_meta() | ||
| 61 | self.assertEqual(meta['Software'], 'MEncoder SVN-r33148-4.0.1') | ||
| 62 | self.assertTrue(p.remove_all()) | ||
| 63 | os.remove('./tests/data/ --output.avi') | ||
| 64 | os.remove('./tests/data/ --output.cleaned.avi') | ||
| 65 | |||
| 53 | 66 | ||
| 54 | class TestUnsupportedEmbeddedFiles(unittest.TestCase): | 67 | class TestUnsupportedEmbeddedFiles(unittest.TestCase): |
| 55 | def test_odt_with_svg(self): | 68 | def test_odt_with_svg(self): |
