diff options
| author | jvoisin | 2018-06-27 21:48:46 +0200 |
|---|---|---|
| committer | jvoisin | 2018-06-27 21:48:46 +0200 |
| commit | 177184ac671eebd2285561a39777dca2c9f70b04 (patch) | |
| tree | 6305121db3bf47c31c58e2588d3d3fc9d4014747 /libmat2 | |
| parent | f44769df4128239d34883bc225413654ff31dfc6 (diff) | |
Massively simplify how we're cleaning office files
Diffstat (limited to 'libmat2')
| -rw-r--r-- | libmat2/office.py | 59 |
1 files changed, 26 insertions, 33 deletions
diff --git a/libmat2/office.py b/libmat2/office.py index aea56b9..e813fae 100644 --- a/libmat2/office.py +++ b/libmat2/office.py | |||
| @@ -47,45 +47,38 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): | |||
| 47 | 47 | ||
| 48 | return metadata | 48 | return metadata |
| 49 | 49 | ||
| 50 | def remove_all(self) -> bool: | ||
| 51 | with zipfile.ZipFile(self.filename) as zin,\ | ||
| 52 | zipfile.ZipFile(self.output_filename, 'w') as zout: | ||
| 50 | 53 | ||
| 51 | def _clean_internal_file(self, item: zipfile.ZipInfo, temp_folder: str, | 54 | temp_folder = tempfile.mkdtemp() |
| 52 | zin: zipfile.ZipFile, zout: zipfile.ZipFile) -> bool: | ||
| 53 | zin.extract(member=item, path=temp_folder) | ||
| 54 | full_path = os.path.join(temp_folder, item.filename) | ||
| 55 | tmp_parser, mtype = parser_factory.get_parser(full_path) # type: ignore | ||
| 56 | if not tmp_parser: | ||
| 57 | zout.close() | ||
| 58 | os.remove(self.output_filename) | ||
| 59 | print("%s's format (%s) isn't supported" % (item.filename, mtype)) | ||
| 60 | return False | ||
| 61 | tmp_parser.remove_all() | ||
| 62 | 55 | ||
| 63 | zinfo = zipfile.ZipInfo(item.filename) # type: ignore | 56 | for item in zin.infolist(): |
| 64 | clean_zinfo = self._clean_zipinfo(zinfo) | 57 | if item.filename[-1] == '/': # `is_dir` is added in Python3.6 |
| 65 | with open(tmp_parser.output_filename, 'rb') as f: | 58 | continue # don't keep empty folders |
| 66 | zout.writestr(clean_zinfo, f.read()) | 59 | elif item.filename in self.files_to_keep: |
| 67 | return True | 60 | item = self._clean_zipinfo(item) |
| 61 | zout.writestr(item, zin.read(item)) | ||
| 62 | continue | ||
| 63 | elif any(map(lambda r: r.search(item.filename), self.files_to_omit)): | ||
| 64 | continue | ||
| 68 | 65 | ||
| 69 | def remove_all(self) -> bool: | 66 | zin.extract(member=item, path=temp_folder) |
| 70 | zin = zipfile.ZipFile(self.filename, 'r') | 67 | full_path = os.path.join(temp_folder, item.filename) |
| 71 | zout = zipfile.ZipFile(self.output_filename, 'w') | 68 | tmp_parser, mtype = parser_factory.get_parser(full_path) # type: ignore |
| 72 | temp_folder = tempfile.mkdtemp() | 69 | if not tmp_parser: |
| 70 | shutil.rmtree(temp_folder) | ||
| 71 | os.remove(self.output_filename) | ||
| 72 | print("%s's format (%s) isn't supported" % (item.filename, mtype)) | ||
| 73 | return False | ||
| 74 | tmp_parser.remove_all() | ||
| 73 | 75 | ||
| 74 | for item in zin.infolist(): | 76 | zinfo = zipfile.ZipInfo(item.filename) # type: ignore |
| 75 | if item.filename[-1] == '/': # `is_dir` is added in Python3.6 | 77 | clean_zinfo = self._clean_zipinfo(zinfo) |
| 76 | continue # don't keep empty folders | 78 | with open(tmp_parser.output_filename, 'rb') as f: |
| 77 | elif item.filename in self.files_to_keep: | 79 | zout.writestr(clean_zinfo, f.read()) |
| 78 | item = self._clean_zipinfo(item) | ||
| 79 | zout.writestr(item, zin.read(item)) | ||
| 80 | continue | ||
| 81 | elif any(map(lambda r: r.search(item.filename), self.files_to_omit)): | ||
| 82 | continue | ||
| 83 | elif not self._clean_internal_file(item, temp_folder, zin, zout): | ||
| 84 | return False | ||
| 85 | 80 | ||
| 86 | shutil.rmtree(temp_folder) | 81 | shutil.rmtree(temp_folder) |
| 87 | zout.close() | ||
| 88 | zin.close() | ||
| 89 | return True | 82 | return True |
| 90 | 83 | ||
| 91 | 84 | ||
