diff options
| author | jvoisin | 2018-10-25 11:29:50 +0200 |
|---|---|---|
| committer | jvoisin | 2018-10-25 11:29:50 +0200 |
| commit | 513d897ea0cf3e006a2b33a89cdbf33cae3592cd (patch) | |
| tree | 73e3ffa11477b86f9cde3f36e763f83b8cf13117 /libmat2 | |
| parent | 5a9dc388ade0604962cd86889dfd1658579539fa (diff) | |
Implement get_meta() for archives
Diffstat (limited to '')
| -rw-r--r-- | libmat2/archive.py | 25 | ||||
| -rw-r--r-- | libmat2/office.py | 2 |
2 files changed, 26 insertions, 1 deletions
diff --git a/libmat2/archive.py b/libmat2/archive.py index f788ecc..80e0bf2 100644 --- a/libmat2/archive.py +++ b/libmat2/archive.py | |||
| @@ -67,6 +67,31 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): | |||
| 67 | 67 | ||
| 68 | return metadata | 68 | return metadata |
| 69 | 69 | ||
| 70 | def get_meta(self) -> Dict[str, Union[str, dict]]: | ||
| 71 | meta = dict() # type: Dict[str, Union[str, dict]] | ||
| 72 | |||
| 73 | with zipfile.ZipFile(self.filename) as zin: | ||
| 74 | temp_folder = tempfile.mkdtemp() | ||
| 75 | |||
| 76 | for item in zin.infolist(): | ||
| 77 | if item.filename[-1] == '/': # pragma: no cover | ||
| 78 | # `is_dir` is added in Python3.6 | ||
| 79 | continue # don't keep empty folders | ||
| 80 | |||
| 81 | zin.extract(member=item, path=temp_folder) | ||
| 82 | full_path = os.path.join(temp_folder, item.filename) | ||
| 83 | |||
| 84 | tmp_parser, _ = parser_factory.get_parser(full_path) # type: ignore | ||
| 85 | if not tmp_parser: | ||
| 86 | continue | ||
| 87 | |||
| 88 | local_meta = tmp_parser.get_meta() | ||
| 89 | if local_meta: | ||
| 90 | meta[item.filename] = local_meta | ||
| 91 | |||
| 92 | shutil.rmtree(temp_folder) | ||
| 93 | return meta | ||
| 94 | |||
| 70 | def remove_all(self) -> bool: | 95 | def remove_all(self) -> bool: |
| 71 | # pylint: disable=too-many-branches | 96 | # pylint: disable=too-many-branches |
| 72 | 97 | ||
diff --git a/libmat2/office.py b/libmat2/office.py index c10664f..e6370e7 100644 --- a/libmat2/office.py +++ b/libmat2/office.py | |||
| @@ -301,7 +301,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser): | |||
| 301 | Yes, I know that parsing xml with regexp ain't pretty, | 301 | Yes, I know that parsing xml with regexp ain't pretty, |
| 302 | be my guest and fix it if you want. | 302 | be my guest and fix it if you want. |
| 303 | """ | 303 | """ |
| 304 | metadata = {} | 304 | metadata = super().get_meta() |
| 305 | zipin = zipfile.ZipFile(self.filename) | 305 | zipin = zipfile.ZipFile(self.filename) |
| 306 | for item in zipin.infolist(): | 306 | for item in zipin.infolist(): |
| 307 | if item.filename.startswith('docProps/') and item.filename.endswith('.xml'): | 307 | if item.filename.startswith('docProps/') and item.filename.endswith('.xml'): |
