summaryrefslogtreecommitdiff
path: root/libmat2/exiftool.py
blob: 2b91ac2d19bd4d796bd7b7b588dd9465e5b06799 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import functools
import json
import logging
import os
import shutil
import subprocess
from typing import Union, Set, Dict

from . import abstract
from . import bubblewrap


class ExiftoolParser(abstract.AbstractParser):
    """ Exiftool is often the easiest way to get all the metadata
    from a import file, hence why several parsers are re-using its `get_meta`
    method.
    """
    meta_allowlist = set()  # type: Set[str]

    def get_meta(self) -> Dict[str, Union[str, Dict]]:
        try:
            if self.sandbox:
                out = bubblewrap.run([_get_exiftool_path(), '-json',
                                      self.filename],
                                     input_filename=self.filename,
                                     check=True, stdout=subprocess.PIPE).stdout
            else:
                out = subprocess.run([_get_exiftool_path(), '-json',
                                      self.filename],
                                     check=True, stdout=subprocess.PIPE).stdout
        except subprocess.CalledProcessError:  # pragma: no cover
            raise ValueError
        meta = json.loads(out.decode('utf-8'))[0]
        for key in self.meta_allowlist:
            meta.pop(key, None)
        return meta

    def _lightweight_cleanup(self) -> bool:
        if os.path.exists(self.output_filename):
            try:  # exiftool can't force output to existing files
                os.remove(self.output_filename)
            except OSError as e:  # pragma: no cover
                logging.error("The output file %s is already existing and \
                               can't be overwritten: %s.", self.filename, e)
                return False

        # Note: '-All=' must be followed by a known exiftool option.
        # Also, '-CommonIFD0' is needed for .tiff files
        cmd = [_get_exiftool_path(),
               '-all=',         # remove metadata
               '-adobe=',       # remove adobe-specific metadata
               '-exif:all=',    # remove all exif metadata
               '-Time:All=',    # remove all timestamps
               '-quiet',        # don't show useless logs
               '-CommonIFD0=',  # remove IFD0 metadata
               '-o', self.output_filename,
               self.filename]
        try:
            if self.sandbox:
                bubblewrap.run(cmd, check=True,
                               input_filename=self.filename,
                               output_filename=self.output_filename)
            else:
                subprocess.run(cmd, check=True)
        except subprocess.CalledProcessError as e:  # pragma: no cover
            logging.error("Something went wrong during the processing of %s: %s", self.filename, e)
            return False
        return True

@functools.lru_cache(maxsize=None)
def _get_exiftool_path() -> str:  # pragma: no cover
    which_path = shutil.which('exiftool')
    if which_path:
        return which_path

    # Exiftool on Arch Linux has a weird path
    if os.access('/usr/bin/vendor_perl/exiftool', os.X_OK):
        return '/usr/bin/vendor_perl/exiftool'

    raise RuntimeError("Unable to find exiftool")