summaryrefslogtreecommitdiff
path: root/libmat2
diff options
context:
space:
mode:
authorjvoisin2018-10-18 19:19:56 +0200
committerjvoisin2018-10-22 12:58:01 +0200
commite70ea811c99c16f3382c08153eda573df0825536 (patch)
treef1c05ca94b9d2ab09f74cfa6c7191bf73bf104e2 /libmat2
parent2ae5d909c3e30c009bfc45bceba96ddd82f3e198 (diff)
Implement support for .avi files, via ffmpeg
- This commit introduces optional dependencies (namely ffmpeg): mat2 will spit a warning when trying to process an .avi file if ffmpeg isn't installed. - Since metadata are obtained via exiftool, this commit also refactors a bit our exfitool wrapper.
Diffstat (limited to 'libmat2')
-rw-r--r--libmat2/__init__.py18
-rw-r--r--libmat2/abstract.py6
-rw-r--r--libmat2/exiftool.py61
-rw-r--r--libmat2/images.py45
-rw-r--r--libmat2/parser_factory.py2
-rw-r--r--libmat2/video.py58
6 files changed, 134 insertions, 56 deletions
diff --git a/libmat2/__init__.py b/libmat2/__init__.py
index f55a14c..399a364 100644
--- a/libmat2/__init__.py
+++ b/libmat2/__init__.py
@@ -1,11 +1,12 @@
1#!/usr/bin/env python3 1#!/usr/bin/env python3
2 2
3import os
4import collections 3import collections
5import enum 4import enum
6import importlib 5import importlib
7from typing import Dict, Optional 6from typing import Dict, Optional
8 7
8from . import exiftool, video
9
9# make pyflakes happy 10# make pyflakes happy
10assert Dict 11assert Dict
11assert Optional 12assert Optional
@@ -37,24 +38,13 @@ DEPENDENCIES = {
37 'mutagen': 'Mutagen', 38 'mutagen': 'Mutagen',
38 } 39 }
39 40
40def _get_exiftool_path() -> str: # pragma: no cover
41 exiftool_path = '/usr/bin/exiftool'
42 if os.path.isfile(exiftool_path):
43 if os.access(exiftool_path, os.X_OK):
44 return exiftool_path
45
46 # ArchLinux
47 exiftool_path = '/usr/bin/vendor_perl/exiftool'
48 if os.path.isfile(exiftool_path):
49 if os.access(exiftool_path, os.X_OK):
50 return exiftool_path
51 41
52 raise ValueError
53 42
54def check_dependencies() -> dict: 43def check_dependencies() -> dict:
55 ret = collections.defaultdict(bool) # type: Dict[str, bool] 44 ret = collections.defaultdict(bool) # type: Dict[str, bool]
56 45
57 ret['Exiftool'] = True if _get_exiftool_path() else False 46 ret['Exiftool'] = True if exiftool._get_exiftool_path() else False
47 ret['Ffmpeg'] = True if video._get_ffmpeg_path() else False
58 48
59 for key, value in DEPENDENCIES.items(): 49 for key, value in DEPENDENCIES.items():
60 ret[value] = True 50 ret[value] = True
diff --git a/libmat2/abstract.py b/libmat2/abstract.py
index 0084796..414a68b 100644
--- a/libmat2/abstract.py
+++ b/libmat2/abstract.py
@@ -7,7 +7,8 @@ assert Set # make pyflakes happy
7 7
8class AbstractParser(abc.ABC): 8class AbstractParser(abc.ABC):
9 """ This is the base class of every parser. 9 """ This is the base class of every parser.
10 It might yield `ValueError` on instantiation on invalid files. 10 It might yield `ValueError` on instantiation on invalid files,
11 and `RuntimeError` when something went wrong in `remove_all`.
11 """ 12 """
12 meta_list = set() # type: Set[str] 13 meta_list = set() # type: Set[str]
13 mimetypes = set() # type: Set[str] 14 mimetypes = set() # type: Set[str]
@@ -27,4 +28,7 @@ class AbstractParser(abc.ABC):
27 28
28 @abc.abstractmethod 29 @abc.abstractmethod
29 def remove_all(self) -> bool: 30 def remove_all(self) -> bool:
31 """
32 :raises RuntimeError: Raised if the cleaning process went wrong.
33 """
30 pass # pragma: no cover 34 pass # pragma: no cover
diff --git a/libmat2/exiftool.py b/libmat2/exiftool.py
new file mode 100644
index 0000000..e17d31b
--- /dev/null
+++ b/libmat2/exiftool.py
@@ -0,0 +1,61 @@
1import json
2import os
3import re
4import shutil
5import subprocess
6import tempfile
7
8from typing import Dict, Union, Set
9
10from . import abstract
11
12# Make pyflakes happy
13assert Set
14
15
16class ExiftoolParser(abstract.AbstractParser):
17 """ Exiftool is often the easiest way to get all the metadata
18 from a import file, hence why several parsers are re-using its `get_meta`
19 method.
20 """
21 meta_whitelist = set() # type: Set[str]
22
23 @staticmethod
24 def __handle_problematic_filename(filename: str, callback) -> bytes:
25 """ This method takes a filename with a problematic name,
26 and safely applies it a `callback`."""
27 tmpdirname = tempfile.mkdtemp()
28 fname = os.path.join(tmpdirname, "temp_file")
29 shutil.copy(filename, fname)
30 out = callback(fname)
31 shutil.rmtree(tmpdirname)
32 return out
33
34 def get_meta(self) -> Dict[str, Union[str, dict]]:
35 """ There is no way to escape the leading(s) dash(es) of the current
36 self.filename to prevent parameter injections, so we need to take care
37 of this.
38 """
39 fun = lambda f: subprocess.check_output([_get_exiftool_path(), '-json', f])
40 if re.search('^[a-z0-9/]', self.filename) is None:
41 out = self.__handle_problematic_filename(self.filename, fun)
42 else:
43 out = fun(self.filename)
44 meta = json.loads(out.decode('utf-8'))[0]
45 for key in self.meta_whitelist:
46 meta.pop(key, None)
47 return meta
48
49def _get_exiftool_path() -> str: # pragma: no cover
50 exiftool_path = '/usr/bin/exiftool'
51 if os.path.isfile(exiftool_path):
52 if os.access(exiftool_path, os.X_OK):
53 return exiftool_path
54
55 # ArchLinux
56 exiftool_path = '/usr/bin/vendor_perl/exiftool'
57 if os.path.isfile(exiftool_path):
58 if os.access(exiftool_path, os.X_OK):
59 return exiftool_path
60
61 raise RuntimeError("Unable to find exiftool")
diff --git a/libmat2/images.py b/libmat2/images.py
index a29cbb7..ad80892 100644
--- a/libmat2/images.py
+++ b/libmat2/images.py
@@ -1,11 +1,6 @@
1import subprocess
2import imghdr 1import imghdr
3import json
4import os 2import os
5import shutil 3from typing import Set
6import tempfile
7import re
8from typing import Set, Dict, Union
9 4
10import cairo 5import cairo
11 6
@@ -13,44 +8,12 @@ import gi
13gi.require_version('GdkPixbuf', '2.0') 8gi.require_version('GdkPixbuf', '2.0')
14from gi.repository import GdkPixbuf 9from gi.repository import GdkPixbuf
15 10
16from . import abstract, _get_exiftool_path 11from . import exiftool
17 12
18# Make pyflakes happy 13# Make pyflakes happy
19assert Set 14assert Set
20 15
21class _ImageParser(abstract.AbstractParser): 16class PNGParser(exiftool.ExiftoolParser):
22 """ Since we use `exiftool` to get metadata from
23 all images fileformat, `get_meta` is implemented in this class,
24 and all the image-handling ones are inheriting from it."""
25 meta_whitelist = set() # type: Set[str]
26
27 @staticmethod
28 def __handle_problematic_filename(filename: str, callback) -> bytes:
29 """ This method takes a filename with a problematic name,
30 and safely applies it a `callback`."""
31 tmpdirname = tempfile.mkdtemp()
32 fname = os.path.join(tmpdirname, "temp_file")
33 shutil.copy(filename, fname)
34 out = callback(fname)
35 shutil.rmtree(tmpdirname)
36 return out
37
38 def get_meta(self) -> Dict[str, Union[str, dict]]:
39 """ There is no way to escape the leading(s) dash(es) of the current
40 self.filename to prevent parameter injections, so we need to take care
41 of this.
42 """
43 fun = lambda f: subprocess.check_output([_get_exiftool_path(), '-json', f])
44 if re.search('^[a-z0-9/]', self.filename) is None:
45 out = self.__handle_problematic_filename(self.filename, fun)
46 else:
47 out = fun(self.filename)
48 meta = json.loads(out.decode('utf-8'))[0]
49 for key in self.meta_whitelist:
50 meta.pop(key, None)
51 return meta
52
53class PNGParser(_ImageParser):
54 mimetypes = {'image/png', } 17 mimetypes = {'image/png', }
55 meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 18 meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
56 'Directory', 'FileSize', 'FileModifyDate', 19 'Directory', 'FileSize', 'FileModifyDate',
@@ -77,7 +40,7 @@ class PNGParser(_ImageParser):
77 return True 40 return True
78 41
79 42
80class GdkPixbufAbstractParser(_ImageParser): 43class GdkPixbufAbstractParser(exiftool.ExiftoolParser):
81 """ GdkPixbuf can handle a lot of surfaces, so we're rending images on it, 44 """ GdkPixbuf can handle a lot of surfaces, so we're rending images on it,
82 this has the side-effect of completely removing metadata. 45 this has the side-effect of completely removing metadata.
83 """ 46 """
diff --git a/libmat2/parser_factory.py b/libmat2/parser_factory.py
index 621640b..4a0ca0d 100644
--- a/libmat2/parser_factory.py
+++ b/libmat2/parser_factory.py
@@ -18,6 +18,8 @@ def __load_all_parsers():
18 continue 18 continue
19 elif fname.endswith('__init__.py'): 19 elif fname.endswith('__init__.py'):
20 continue 20 continue
21 elif fname.endswith('exiftool.py'):
22 continue
21 basename = os.path.basename(fname) 23 basename = os.path.basename(fname)
22 name, _ = os.path.splitext(basename) 24 name, _ = os.path.splitext(basename)
23 importlib.import_module('.' + name, package='libmat2') 25 importlib.import_module('.' + name, package='libmat2')
diff --git a/libmat2/video.py b/libmat2/video.py
new file mode 100644
index 0000000..b9f3687
--- /dev/null
+++ b/libmat2/video.py
@@ -0,0 +1,58 @@
1import os
2import subprocess
3
4from . import exiftool
5
6
7class AVIParser(exiftool.ExiftoolParser):
8 mimetypes = {'video/x-msvideo', }
9 meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',
10 'FileSize', 'FileModifyDate', 'FileAccessDate',
11 'FileInodeChangeDate', 'FilePermissions', 'FileType',
12 'FileTypeExtension', 'MIMEType', 'FrameRate', 'MaxDataRate',
13 'FrameCount', 'StreamCount', 'StreamType', 'VideoCodec',
14 'VideoFrameRate', 'VideoFrameCount', 'Quality',
15 'SampleSize', 'BMPVersion', 'ImageWidth', 'ImageHeight',
16 'Planes', 'BitDepth', 'Compression', 'ImageLength',
17 'PixelsPerMeterX', 'PixelsPerMeterY', 'NumColors',
18 'NumImportantColors', 'NumColors', 'NumImportantColors',
19 'RedMask', 'GreenMask', 'BlueMask', 'AlphaMask',
20 'ColorSpace', 'AudioCodec', 'AudioCodecRate',
21 'AudioSampleCount', 'AudioSampleCount',
22 'AudioSampleRate', 'Encoding', 'NumChannels',
23 'SampleRate', 'AvgBytesPerSec', 'BitsPerSample',
24 'Duration', 'ImageSize', 'Megapixels'}
25
26 def remove_all(self) -> bool:
27 """
28 TODO: handle problematic filenames starting with `-` and `--`,
29 check exiftool.py
30 """
31 cmd = [_get_ffmpeg_path(),
32 '-i', self.filename, # input file
33 '-y', # overwrite existing output file
34 '-loglevel', 'panic', # Don't show log
35 '-hide_banner', # hide the banner
36 '-codec', 'copy', # don't decode anything, just copy (speed!)
37 '-map_metadata', '-1', # remove supperficial metadata
38 '-map_chapters', '-1', # remove chapters
39 '-fflags', '+bitexact', # don't add any metadata
40 '-flags:v', '+bitexact', # don't add any metadata
41 '-flags:a', '+bitexact', # don't add any metadata
42 self.output_filename]
43
44 try:
45 subprocess.check_call(cmd)
46 except subprocess.CalledProcessError: # pragma: no cover
47 return False
48
49 return True
50
51
52def _get_ffmpeg_path() -> str: # pragma: no cover
53 ffmpeg_path = '/usr/bin/ffmpeg'
54 if os.path.isfile(ffmpeg_path):
55 if os.access(ffmpeg_path, os.X_OK):
56 return ffmpeg_path
57
58 raise RuntimeError("Unable to find ffmpeg")