From 38df679a88a19db3a4a82fdb8e20a42c9a53d1a1 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Mon, 22 Oct 2018 19:12:39 +0200 Subject: Optimize the handling of problematic files --- libmat2/abstract.py | 6 ++++++ libmat2/exiftool.py | 23 ++--------------------- libmat2/video.py | 10 +++------- 3 files changed, 11 insertions(+), 28 deletions(-) (limited to 'libmat2') diff --git a/libmat2/abstract.py b/libmat2/abstract.py index 414a68b..9b510f6 100644 --- a/libmat2/abstract.py +++ b/libmat2/abstract.py @@ -1,5 +1,6 @@ import abc import os +import re from typing import Set, Dict, Union assert Set # make pyflakes happy @@ -17,6 +18,11 @@ class AbstractParser(abc.ABC): """ :raises ValueError: Raised upon an invalid file """ + if re.search('^[a-z0-9./]', filename) is None: + # Some parsers are calling external binaries, + # this prevents shell command injections + filename = os.path.join('.', filename) + self.filename = filename fname, extension = os.path.splitext(filename) self.output_filename = fname + '.cleaned' + extension diff --git a/libmat2/exiftool.py b/libmat2/exiftool.py index 331ae0c..11dd36d 100644 --- a/libmat2/exiftool.py +++ b/libmat2/exiftool.py @@ -1,11 +1,7 @@ import json import os -import re -import shutil import subprocess -import tempfile - -from typing import Dict, Union, Set, Callable, Any +from typing import Dict, Union, Set from . import abstract @@ -20,23 +16,8 @@ class ExiftoolParser(abstract.AbstractParser): """ meta_whitelist = set() # type: Set[str] - def _handle_problematic_filename(self, callback: Callable[[str], Any]) -> bytes: - """ This method takes a filename with a potentially problematic name, - and safely applies a `callback` to it. - """ - if re.search('^[a-z0-9/]', self.filename) is not None: - return callback(self.filename) - - tmpdirname = tempfile.mkdtemp() - fname = os.path.join(tmpdirname, "temp_file") - shutil.copy(self.filename, fname) - out = callback(fname) - shutil.rmtree(tmpdirname) - return out - def get_meta(self) -> Dict[str, Union[str, dict]]: - fun = lambda f: subprocess.check_output([_get_exiftool_path(), '-json', f]) - out = self._handle_problematic_filename(fun) + out = subprocess.check_output([_get_exiftool_path(), '-json', self.filename]) meta = json.loads(out.decode('utf-8'))[0] for key in self.meta_whitelist: meta.pop(key, None) diff --git a/libmat2/video.py b/libmat2/video.py index 2fa65e8..fe2a1af 100644 --- a/libmat2/video.py +++ b/libmat2/video.py @@ -24,10 +24,9 @@ class AVIParser(exiftool.ExiftoolParser): 'SampleRate', 'AvgBytesPerSec', 'BitsPerSample', 'Duration', 'ImageSize', 'Megapixels'} - - def __remove_all_internal(self, filename: str): + def remove_all(self): cmd = [_get_ffmpeg_path(), - '-i', filename, # input file + '-i', self.filename, # input file '-y', # overwrite existing output file '-loglevel', 'panic', # Don't show log '-hide_banner', # hide the banner @@ -38,11 +37,8 @@ class AVIParser(exiftool.ExiftoolParser): '-flags:v', '+bitexact', # don't add any metadata '-flags:a', '+bitexact', # don't add any metadata self.output_filename] - subprocess.check_call(cmd) - - def remove_all(self) -> bool: try: - self._handle_problematic_filename(self.__remove_all_internal) + subprocess.check_call(cmd) except subprocess.CalledProcessError as e: logging.error("Something went wrong during the processing of %s: %s", self.filename, e) return False -- cgit v1.3