From b9a62d798af14ea799ae5fceab1ed7a537d1cbdd Mon Sep 17 00:00:00 2001 From: jvoisin Date: Sun, 3 Feb 2019 22:55:15 +0100 Subject: Refactor a bit office get_meta handling This should make easier to get more metadata from archive-based file formats. --- libmat2/archive.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'libmat2/archive.py') diff --git a/libmat2/archive.py b/libmat2/archive.py index b2483fc..d155664 100644 --- a/libmat2/archive.py +++ b/libmat2/archive.py @@ -4,7 +4,7 @@ import tempfile import os import logging import shutil -from typing import Dict, Set, Pattern, Union +from typing import Dict, Set, Pattern, Union, Any from . import abstract, UnknownMemberPolicy, parser_factory @@ -42,6 +42,12 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): # pylint: disable=unused-argument,no-self-use return True # pragma: no cover + def _specific_get_meta(self, full_path: str, file_path: str) -> Dict[str, Any]: + """ This method can be used to extract specific metadata + from files present in the archive.""" + # pylint: disable=unused-argument,no-self-use + return {} # pragma: no cover + @staticmethod def _clean_zipinfo(zipinfo: zipfile.ZipInfo) -> zipfile.ZipInfo: zipinfo.create_system = 3 # Linux @@ -74,6 +80,10 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): temp_folder = tempfile.mkdtemp() for item in zin.infolist(): + local_meta = dict() # type: Dict[str, Union[str, Dict]] + for k, v in self._get_zipinfo_meta(item).items(): + local_meta[k] = v + if item.filename[-1] == '/': # pragma: no cover # `is_dir` is added in Python3.6 continue # don't keep empty folders @@ -81,11 +91,15 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): zin.extract(member=item, path=temp_folder) full_path = os.path.join(temp_folder, item.filename) + specific_meta = self._specific_get_meta(full_path, item.filename) + for (k, v) in specific_meta.items(): + local_meta[k] = v + tmp_parser, _ = parser_factory.get_parser(full_path) # type: ignore - if not tmp_parser: - continue + if tmp_parser: + for k, v in tmp_parser.get_meta().items(): + local_meta[k] = v - local_meta = tmp_parser.get_meta() if local_meta: meta[item.filename] = local_meta -- cgit v1.3