summaryrefslogtreecommitdiff
path: root/libmat2
diff options
context:
space:
mode:
authorjvoisin2018-10-25 11:29:50 +0200
committerjvoisin2018-10-25 11:29:50 +0200
commit513d897ea0cf3e006a2b33a89cdbf33cae3592cd (patch)
tree73e3ffa11477b86f9cde3f36e763f83b8cf13117 /libmat2
parent5a9dc388ade0604962cd86889dfd1658579539fa (diff)
Implement get_meta() for archives
Diffstat (limited to '')
-rw-r--r--libmat2/archive.py25
-rw-r--r--libmat2/office.py2
2 files changed, 26 insertions, 1 deletions
diff --git a/libmat2/archive.py b/libmat2/archive.py
index f788ecc..80e0bf2 100644
--- a/libmat2/archive.py
+++ b/libmat2/archive.py
@@ -67,6 +67,31 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
67 67
68 return metadata 68 return metadata
69 69
70 def get_meta(self) -> Dict[str, Union[str, dict]]:
71 meta = dict() # type: Dict[str, Union[str, dict]]
72
73 with zipfile.ZipFile(self.filename) as zin:
74 temp_folder = tempfile.mkdtemp()
75
76 for item in zin.infolist():
77 if item.filename[-1] == '/': # pragma: no cover
78 # `is_dir` is added in Python3.6
79 continue # don't keep empty folders
80
81 zin.extract(member=item, path=temp_folder)
82 full_path = os.path.join(temp_folder, item.filename)
83
84 tmp_parser, _ = parser_factory.get_parser(full_path) # type: ignore
85 if not tmp_parser:
86 continue
87
88 local_meta = tmp_parser.get_meta()
89 if local_meta:
90 meta[item.filename] = local_meta
91
92 shutil.rmtree(temp_folder)
93 return meta
94
70 def remove_all(self) -> bool: 95 def remove_all(self) -> bool:
71 # pylint: disable=too-many-branches 96 # pylint: disable=too-many-branches
72 97
diff --git a/libmat2/office.py b/libmat2/office.py
index c10664f..e6370e7 100644
--- a/libmat2/office.py
+++ b/libmat2/office.py
@@ -301,7 +301,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
301 Yes, I know that parsing xml with regexp ain't pretty, 301 Yes, I know that parsing xml with regexp ain't pretty,
302 be my guest and fix it if you want. 302 be my guest and fix it if you want.
303 """ 303 """
304 metadata = {} 304 metadata = super().get_meta()
305 zipin = zipfile.ZipFile(self.filename) 305 zipin = zipfile.ZipFile(self.filename)
306 for item in zipin.infolist(): 306 for item in zipin.infolist():
307 if item.filename.startswith('docProps/') and item.filename.endswith('.xml'): 307 if item.filename.startswith('docProps/') and item.filename.endswith('.xml'):