summaryrefslogtreecommitdiff
path: root/libmat2/archive.py
diff options
context:
space:
mode:
authorjvoisin2019-02-03 22:55:15 +0100
committerjvoisin2019-02-04 00:31:26 +0100
commitb9a62d798af14ea799ae5fceab1ed7a537d1cbdd (patch)
treea50622baf990acface31398adaef395bb398ed5d /libmat2/archive.py
parent54e50450ad9f8657ed7c60d5c0f9ab5c648d08ee (diff)
Refactor a bit office get_meta handling
This should make easier to get more metadata from archive-based file formats.
Diffstat (limited to 'libmat2/archive.py')
-rw-r--r--libmat2/archive.py22
1 files changed, 18 insertions, 4 deletions
diff --git a/libmat2/archive.py b/libmat2/archive.py
index b2483fc..d155664 100644
--- a/libmat2/archive.py
+++ b/libmat2/archive.py
@@ -4,7 +4,7 @@ import tempfile
4import os 4import os
5import logging 5import logging
6import shutil 6import shutil
7from typing import Dict, Set, Pattern, Union 7from typing import Dict, Set, Pattern, Union, Any
8 8
9from . import abstract, UnknownMemberPolicy, parser_factory 9from . import abstract, UnknownMemberPolicy, parser_factory
10 10
@@ -42,6 +42,12 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
42 # pylint: disable=unused-argument,no-self-use 42 # pylint: disable=unused-argument,no-self-use
43 return True # pragma: no cover 43 return True # pragma: no cover
44 44
45 def _specific_get_meta(self, full_path: str, file_path: str) -> Dict[str, Any]:
46 """ This method can be used to extract specific metadata
47 from files present in the archive."""
48 # pylint: disable=unused-argument,no-self-use
49 return {} # pragma: no cover
50
45 @staticmethod 51 @staticmethod
46 def _clean_zipinfo(zipinfo: zipfile.ZipInfo) -> zipfile.ZipInfo: 52 def _clean_zipinfo(zipinfo: zipfile.ZipInfo) -> zipfile.ZipInfo:
47 zipinfo.create_system = 3 # Linux 53 zipinfo.create_system = 3 # Linux
@@ -74,6 +80,10 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
74 temp_folder = tempfile.mkdtemp() 80 temp_folder = tempfile.mkdtemp()
75 81
76 for item in zin.infolist(): 82 for item in zin.infolist():
83 local_meta = dict() # type: Dict[str, Union[str, Dict]]
84 for k, v in self._get_zipinfo_meta(item).items():
85 local_meta[k] = v
86
77 if item.filename[-1] == '/': # pragma: no cover 87 if item.filename[-1] == '/': # pragma: no cover
78 # `is_dir` is added in Python3.6 88 # `is_dir` is added in Python3.6
79 continue # don't keep empty folders 89 continue # don't keep empty folders
@@ -81,11 +91,15 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
81 zin.extract(member=item, path=temp_folder) 91 zin.extract(member=item, path=temp_folder)
82 full_path = os.path.join(temp_folder, item.filename) 92 full_path = os.path.join(temp_folder, item.filename)
83 93
94 specific_meta = self._specific_get_meta(full_path, item.filename)
95 for (k, v) in specific_meta.items():
96 local_meta[k] = v
97
84 tmp_parser, _ = parser_factory.get_parser(full_path) # type: ignore 98 tmp_parser, _ = parser_factory.get_parser(full_path) # type: ignore
85 if not tmp_parser: 99 if tmp_parser:
86 continue 100 for k, v in tmp_parser.get_meta().items():
101 local_meta[k] = v
87 102
88 local_meta = tmp_parser.get_meta()
89 if local_meta: 103 if local_meta:
90 meta[item.filename] = local_meta 104 meta[item.filename] = local_meta
91 105