diff options
| author | jvoisin | 2018-04-01 00:17:06 +0200 |
|---|---|---|
| committer | jvoisin | 2018-04-01 00:17:06 +0200 |
| commit | c186fc42929b2660e5c507adeb8a8fb406593b11 (patch) | |
| tree | c82d071d61ceec3a20d48961390e73f6139f2136 /src | |
| parent | 6d506b87575ded3a59c9fc4f7b28d4160d9e9c43 (diff) | |
Clean deep metadata for zip files
Diffstat (limited to 'src')
| -rw-r--r-- | src/libreoffice.py | 12 | ||||
| -rw-r--r-- | src/office.py | 14 |
2 files changed, 24 insertions, 2 deletions
diff --git a/src/libreoffice.py b/src/libreoffice.py index a3481a1..809ae3c 100644 --- a/src/libreoffice.py +++ b/src/libreoffice.py | |||
| @@ -34,6 +34,13 @@ class LibreOfficeParser(abstract.AbstractParser): | |||
| 34 | zipin.close() | 34 | zipin.close() |
| 35 | return metadata | 35 | return metadata |
| 36 | 36 | ||
| 37 | def __clean_zipinfo(self, zipinfo:zipfile.ZipInfo) -> zipfile.ZipInfo: | ||
| 38 | zipinfo.compress_type = zipfile.ZIP_DEFLATED | ||
| 39 | zipinfo.create_system = 3 # Linux | ||
| 40 | zipinfo.comment = b'' | ||
| 41 | zipinfo.date_time = (1980, 1, 1, 0, 0, 0) | ||
| 42 | return zipinfo | ||
| 43 | |||
| 37 | def remove_all(self): | 44 | def remove_all(self): |
| 38 | zin = zipfile.ZipFile(self.filename, 'r') | 45 | zin = zipfile.ZipFile(self.filename, 'r') |
| 39 | zout = zipfile.ZipFile(self.output_filename, 'w') | 46 | zout = zipfile.ZipFile(self.output_filename, 'w') |
| @@ -51,7 +58,10 @@ class LibreOfficeParser(abstract.AbstractParser): | |||
| 51 | print("%s isn't supported" % item.filename) | 58 | print("%s isn't supported" % item.filename) |
| 52 | continue | 59 | continue |
| 53 | tmp_parser.remove_all() | 60 | tmp_parser.remove_all() |
| 54 | zout.write(tmp_parser.output_filename, item.filename) | 61 | zinfo = zipfile.ZipInfo(item.filename) |
| 62 | item = self.__clean_zipinfo(item) | ||
| 63 | with open(tmp_parser.output_filename, 'rb') as f: | ||
| 64 | zout.writestr(zinfo, f.read()) | ||
| 55 | shutil.rmtree(temp_folder) | 65 | shutil.rmtree(temp_folder) |
| 56 | zout.close() | 66 | zout.close() |
| 57 | zin.close() | 67 | zin.close() |
diff --git a/src/office.py b/src/office.py index 5de0597..a729f2f 100644 --- a/src/office.py +++ b/src/office.py | |||
| @@ -33,6 +33,13 @@ class OfficeParser(abstract.AbstractParser): | |||
| 33 | zipin.close() | 33 | zipin.close() |
| 34 | return metadata | 34 | return metadata |
| 35 | 35 | ||
| 36 | def __clean_zipinfo(self, zipinfo:zipfile.ZipInfo) -> zipfile.ZipInfo: | ||
| 37 | zipinfo.compress_type = zipfile.ZIP_DEFLATED | ||
| 38 | zipinfo.create_system = 3 # Linux | ||
| 39 | zipinfo.comment = b'' | ||
| 40 | zipinfo.date_time = (1980, 1, 1, 0, 0, 0) | ||
| 41 | return zipinfo | ||
| 42 | |||
| 36 | def remove_all(self): | 43 | def remove_all(self): |
| 37 | zin = zipfile.ZipFile(self.filename, 'r') | 44 | zin = zipfile.ZipFile(self.filename, 'r') |
| 38 | zout = zipfile.ZipFile(self.output_filename, 'w') | 45 | zout = zipfile.ZipFile(self.output_filename, 'w') |
| @@ -45,6 +52,7 @@ class OfficeParser(abstract.AbstractParser): | |||
| 45 | if not item.filename.endswith('.rels'): | 52 | if not item.filename.endswith('.rels'): |
| 46 | continue # don't keep metadata files | 53 | continue # don't keep metadata files |
| 47 | if item.filename in self.files_to_keep: | 54 | if item.filename in self.files_to_keep: |
| 55 | item = self.__clean_zipinfo(item) | ||
| 48 | zout.writestr(item, zin.read(item)) | 56 | zout.writestr(item, zin.read(item)) |
| 49 | continue | 57 | continue |
| 50 | 58 | ||
| @@ -54,7 +62,11 @@ class OfficeParser(abstract.AbstractParser): | |||
| 54 | print("%s isn't supported" % item.filename) | 62 | print("%s isn't supported" % item.filename) |
| 55 | continue | 63 | continue |
| 56 | tmp_parser.remove_all() | 64 | tmp_parser.remove_all() |
| 57 | zout.write(tmp_parser.output_filename, item.filename) | 65 | zinfo = zipfile.ZipInfo(item.filename) |
| 66 | item = self.__clean_zipinfo(item) | ||
| 67 | with open(tmp_parser.output_filename, 'rb') as f: | ||
| 68 | zout.writestr(zinfo, f.read()) | ||
| 69 | |||
| 58 | shutil.rmtree(temp_folder) | 70 | shutil.rmtree(temp_folder) |
| 59 | zout.close() | 71 | zout.close() |
| 60 | zin.close() | 72 | zin.close() |
